diff --git a/CMakeLists.txt b/CMakeLists.txt
index c01c54d..aba8597 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,150 +1,150 @@
 #
 # @file CMakeLists.txt
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Nicolas Richart <nicolas.richart@epfl.ch>
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 cmake_minimum_required (VERSION 2.8.8)
 #cmake_policy(SET CMP0053 NEW)
 
 project(bsplines_root Fortran C)
 
 #enable_language(Fortran)
 enable_testing()
 
 option(BSPLINES_USE_MUMPS "Activate the mumps interface" ON)
 if(NOT DEFINED BSPLINES_EXAMPLES)
   option(BSPLINES_EXAMPLES "Compiles the examples" ON)
 endif()
 
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake")
 
 # Assume we are on CRAY if ftn is the Fortran compiler
 if (${CMAKE_Fortran_COMPILER} MATCHES "ftn$")
   set(CRAY TRUE)
   if(${CMAKE_Fortran_COMPILER_ID} MATCHES "Cray")
     set(cray_suffix cray)
   elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "PGI")
     set(cray_suffix pgi)
   elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel")
     set(cray_suffix intel)
   elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "GNU")
     set(cray_suffix gnu)
   endif()
 else()
   set(CRAY FALSE)
 endif()
 
 if(POLICY CMP0074)
   cmake_policy(SET CMP0074 NEW)
 endif()
 
 
 include(CMakeFlagsHandling)
 # Compiler flags for debug/optimization
 if(${CMAKE_Fortran_COMPILER_ID} MATCHES "Cray")
 elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel")
   set(CMAKE_AR ${XIAR})
   add_flags(LANG Fortran TYPE DEBUG -traceback "-check bounds" "-warn unused")
   add_flags(LANG Fortran TYPE RELEASE -xHost)
 elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "GNU")
   add_flags(LANG Fortran TYPE DEBUG -fbounds-check -fbacktrace)
 endif()
 
 if(NOT MUMPS)
   set(MUMPS $ENV{MUMPS_ROOT})
 endif()
 
 # Installation root directory
 if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
   set(PREFIX $ENV{PREFIX})
   if(PREFIX)
     set(${CMAKE_INSTALL_PREFIX} ${PREFIX})
   else()
     set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_SOURCE_DIR} CACHE PATH "..." FORCE)
   endif()
   message(STATUS "CMAKE_INSTALL_PREFIX is " ${CMAKE_INSTALL_PREFIX})
 endif()
 
 # Search and load the FUTILS configuration file
 if(NOT TARGET futils)
   find_package(futils PATHS ${FUTILS}/lib/cmake REQUIRED)
 endif()
 
 if(BSPLINES_USE_MUMPS)
   find_package(Mumps REQUIRED)
   set(HAS_MUMPS ${MUMPS_FOUND})
 else()
   set(HAS_MUMPS FALSE)
 endif()
 
 # Find lapack/blas. Skip it if on CRAY!
 if(CRAY)
   set(BSPLINES_USE_PARDISO OFF)
 endif()
 include(blas)
 
 if(NOT BSPLINES_EXPORT_TARGETS)
   set(BSPLINES_EXPORT_TARGETS bsplines-targets)
 endif()
 
 find_package(MPI COMPONENTS Fortran REQUIRED)
 
 include(GNUInstallDirs)
 
 add_subdirectory(pppack)
 add_subdirectory(pputils2)
 add_subdirectory(fft)
 add_subdirectory(src)
 
 if(HAS_MUMPS AND BSPLINES_EXAMPLES)
   add_subdirectory(multigrid)
 endif()
 
 if(BSPLINES_EXAMPLES)
   add_subdirectory(examples)
   add_subdirectory(wk)
 endif()
 
 export(TARGETS pppack pputils2 bsplines fft
   FILE "${CMAKE_BINARY_DIR}/bsplinesLibraryDepends.cmake")
 export(PACKAGE bsplines)
 
 # install configuration files
 if(BSPLINES_EXPORT_TARGETS MATCHES "bsplines-targets")
   install(EXPORT bsplines-targets
     DESTINATION lib/cmake
     )
 
   configure_file(
     ${CMAKE_CURRENT_SOURCE_DIR}/cmake/bsplines-config.cmake.in
     ${CMAKE_CURRENT_BINARY_DIR}/cmake/bsplines-config.cmake @ONLY
     )
   install(FILES
     ${CMAKE_CURRENT_BINARY_DIR}/cmake/bsplines-config.cmake
     DESTINATION lib/cmake
     )
 endif()
 
 # enable packaging with CPack
 include(CPack)
diff --git a/cmake/CMakeFlagsHandling.cmake b/cmake/CMakeFlagsHandling.cmake
index d9955fe..ccab47d 100644
--- a/cmake/CMakeFlagsHandling.cmake
+++ b/cmake/CMakeFlagsHandling.cmake
@@ -1,100 +1,100 @@
 #
 # @file CMakeFlagsHandling.cmake
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Nicolas Richart <nicolas.richart@epfl.ch>
 #
 if(_CMAKE_FLAGS_HANDLING)
   return()
 endif()
 set(_CMAKE_FLAGS_HANDLING TRUE)
 
 #===============================================================================
 # Compilation options handling
 #===============================================================================
 macro(_get_flags_message lang type desc)
   if(${lang} MATCHES "C.." OR ${lang} MATCHES "Fortran")
     set(${desc} "Flags used by the compiler")
   elseif(${lang} MATCHES ".*_LINKER")
     set(${desc} "Flags used by the linker")
   endif()
 
   if(${lang} MATCHES "SHARED_LINKER")
     set(${desc} "${desc} during the creation of shared libraries")
   elseif(${lang} MATCHES "MODULE_LINKER")
     set(${desc} "${desc} during the creation of modules")
   elseif(${lang} MATCHES "STATIC_LINKER")
     set(${desc} "${desc} linker during the creation of static libraries")
   endif()
 
   if(${type} MATCHES "ALL")
     set(${desc} "${desc} during all build types")
   else()
     set(${desc} "${desc} during ${type} builds")
   endif()
 endmacro()
 
 #===============================================================================
 
 function(handle_flags)
   include(CMakeParseArguments)
   cmake_parse_arguments(_flags
     "ADD;REMOVE" "LANG;TYPE" ""
     ${ARGN}
     )
 
   if(NOT _flags_LANG)
     set(_flags_LANG ${FLAGS_HANDLING_DEFAULT_LANGUAGE})
   endif()
 
   set(_variable CMAKE_${_flags_LANG}_FLAGS)
 
   if (_flags_TYPE)
     set(_variable ${_variable}_${_flags_TYPE})
   else()
     set(_flags_TYPE "ALL")
   endif()
 
   _get_flags_message(${_flags_LANG} ${_flags_TYPE} _desc)
   foreach(flag ${_flags_UNPARSED_ARGUMENTS})
     if (_flags_ADD)
       string(REPLACE "${flag}" "match" _temp_var "${${_variable}}")
       if(NOT _temp_var MATCHES "match")
         set(${_variable} "${flag} ${${_variable}}" CACHE STRING ${_desc} FORCE)
       endif()
     elseif(_flags_REMOVE)
       string(REPLACE "${flag} " "" ${_variable} "${${_variable}}")
       set(${_variable} "${${_variable}}" CACHE STRING ${_desc} FORCE)
     endif()
   endforeach()
 endfunction()
 
 #===============================================================================
 function(add_flags)
   handle_flags(ADD ${ARGN})
 endfunction()
 
 #===============================================================================
 function(remove_flags)
   handle_flags(REMOVE ${ARGN})
 endfunction()
 #===============================================================================
diff --git a/cmake/CheckFindMumps.c b/cmake/CheckFindMumps.c
index 3b006f7..b402816 100644
--- a/cmake/CheckFindMumps.c
+++ b/cmake/CheckFindMumps.c
@@ -1,105 +1,105 @@
 /**
  * @file CheckFindMumps.c
  *
  * @brief
  *
  * @copyright
  * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
  * SPC (Swiss Plasma Center)
  *
- * spclibs is free software: you can redistribute it and/or modify it under
+ * SPClibs is free software: you can redistribute it and/or modify it under
  * the terms of the GNU Lesser General Public License as published by the Free
  * Software Foundation, either version 3 of the License, or (at your option)
  * any later version.
  *
- * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program. If not, see <https://www.gnu.org/licenses/>.
  *
  * @authors
  * (in alphabetical order)
  * @author Nicolas Richart <nicolas.richart@epfl.ch>
  */
 #include <stdio.h>
 
 #if !defined(MUMPS_SEQ)
 #  include <mpi.h>
 #endif
 
 #define JOB_INIT -1
 #define JOB_END -2
 #define JOB_COMPLETE 6
 #define USE_COMM_WORLD -987654
 
 #define icntl(n) id.icntl[n - 1]
 
 int main(int argc, char **argv) {
   int n = 2;
   int nz = 2;
 
   int irn[2] = {1, 2};
   int jcn[2] = {1, 2};
   Real a[2];
   Real rhs[2];
 
 #if !defined(MUMPS_SEQ)
   MPI_Init(&argc, &argv);
 #endif
 
   rhs[0] = 1.0; rhs[1]=4.0;
   a[0] = 1.0; a[1] = 2.0;
 
   id.job = JOB_INIT;
   id.par = 1;
   id.sym = 0;
 
 #if !defined(MUMPS_SEQ)
   id.comm_fortran = USE_COMM_WORLD;
 #endif
 
   mumps_c(&id);
 
   // Default Scaling
   icntl(8) = 77;
 
   // Assembled matrix
   icntl(5) = 0;
 
   /// Default centralized dense second member
   icntl(20) = 0;
   icntl(21) = 0;
 
   // automatic choice for analysis analysis
   icntl(28) = 0;
 
   // fully distributed
   icntl(18) = 3;
 
   id.n = n;
 
   id.nz_loc = nz;
   id.irn_loc = irn;
   id.jcn_loc = jcn;
 
   id.a_loc = a;
   id.rhs = rhs;
 
   icntl(1) = -1;
   icntl(2) = -1;
   icntl(3) = -1;
   icntl(4) = 0;
 
 
   id.job = JOB_COMPLETE;
   mumps_c(&id);
 
   id.job=JOB_END;
   mumps_c(&id);
 
   printf("Solution is : (%8.2f %8.2f)\n", rhs[0], rhs[1]);
 
   return 0;
 }
diff --git a/cmake/FindFFTW.cmake b/cmake/FindFFTW.cmake
index 855fa81..7abde8a 100644
--- a/cmake/FindFFTW.cmake
+++ b/cmake/FindFFTW.cmake
@@ -1,46 +1,46 @@
 #
 # @file FindFFTW.cmake
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Nicolas Richart <nicolas.richart@epfl.ch>
 #
 if(CMAKE_C_COMPILER_ID MATCHES "Cray")
   set(_cray TRUE)
 endif()
 
 # Find FFTW2
 if (${_cray})
 #  set(FFTW_LIBRARY "-ldfftw")
   set(FFTW_LIBRARY "${FFTW_DIR}/libdfftw.a")
 else()
   find_library(FFTW_LIBRARY NAMES fftw PATHS ${FFTW}/lib)
   find_library(FFTW_LIBRARY NAMES fftw3 PATHS ${FFTW}/lib)
   find_path(FFTW_INCLUDES fftw_f77.h ${FFTW}/include)
   find_path(FFTW_INCLUDES fftw_f77.i ${FFTW}/include)
   find_path(FFTW_INCLUDES fftw.h ${FFTW}/include)
   find_path(FFTW_INCLUDES fftw3.h ${FFTW}/include)
 endif()
 
 mark_as_advanced(FFTW_LIBRARY FFTW_INCLUDES)
 
 include(FindPackageHandleStandardArgs)
 find_package_handle_standard_args(FFTW DEFAULT_MSG FFTW_LIBRARY)
diff --git a/cmake/FindMETIS.cmake b/cmake/FindMETIS.cmake
index 879b0a9..f194320 100644
--- a/cmake/FindMETIS.cmake
+++ b/cmake/FindMETIS.cmake
@@ -1,62 +1,62 @@
 #
 # @file FindMETIS.cmake
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Nicolas Richart <nicolas.richart@epfl.ch>
 #
 find_path(METIS_INCLUDE_DIR metis.h
   PATHS "${METIS_DIR}"
   ENV METIS_DIR
   PATH_SUFFIXES include
   )
 
 find_library(METIS_LIBRARY NAMES metis
   PATHS "${METIS_DIR}"
   ENV METIS_DIR
   PATH_SUFFIXES lib
   )
 
 mark_as_advanced(METIS_LIBRARY METIS_INCLUDE_DIR)
 
 #===============================================================================
 include(FindPackageHandleStandardArgs)
 if(CMAKE_VERSION VERSION_GREATER 2.8.12)
   if(METIS_INCLUDE_DIR)
     file(STRINGS ${METIS_INCLUDE_DIR}/metis.h _versions
       REGEX "^#define\ +METIS_VER_(MAJOR|MINOR|SUBMINOR) .*")
     foreach(_ver ${_versions})
       string(REGEX MATCH "METIS_VER_(MAJOR|MINOR|SUBMINOR) *([0-9.]+)" _tmp "${_ver}")
       set(_metis_${CMAKE_MATCH_1} ${CMAKE_MATCH_2})
     endforeach()
     set(METIS_VERSION "${_metis_MAJOR}.${_metis_MINOR}" CACHE INTERNAL "")
   endif()
 
   find_package_handle_standard_args(METIS
     REQUIRED_VARS
       METIS_LIBRARY
       METIS_INCLUDE_DIR
     VERSION_VAR
       METIS_VERSION)
 else()
   find_package_handle_standard_args(METIS DEFAULT_MSG
     METIS_LIBRARY METIS_INCLUDE_DIR)
 endif()
diff --git a/cmake/FindPETSc.cmake b/cmake/FindPETSc.cmake
index 67cfc35..27a5299 100644
--- a/cmake/FindPETSc.cmake
+++ b/cmake/FindPETSc.cmake
@@ -1,92 +1,92 @@
 #
 # @file FindPETSc.cmake
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Nicolas Richart <nicolas.richart@epfl.ch>
 #
 # - Try to find PETSc
 #  PETSC_FOUND         - system has PETSc
 #  PETSC_INCLUDE_DIRS  - the PETSc include directories
 #  PETSC_LIBRARIES     - Link these to use PETSc
 #  PETSC_VERSION       - Version string (MAJOR.MINOR.SUBMINOR)
 
 if(PETSc_FIND_REQUIRED)
   find_package(PkgConfig REQUIRED)
 else()
   find_package(PkgConfig QUIET)
   if(NOT PKG_CONFIG_FOUND)
     return()
   endif()
 endif()
 
 pkg_search_module(_petsc PETSc)
 
 # Some debug code
 #get_property(_vars DIRECTORY PROPERTY VARIABLES)
 #foreach(_var ${_vars})
 #  if ("${_var}" MATCHES "^_petsc")
 #    message("${_var} -> ${${_var}}")
 #  endif()
 #endforeach()
 
 if(_petsc_FOUND AND _petsc_VERSION)
   set(PETSC_VERSION ${_petsc_VERSION})
 endif()
 
 if(_petsc_FOUND)
   set(_petsc_libs)
   foreach(_lib ${_petsc_LIBRARIES})
     string(TOUPPER "${_lib}" _u_lib)
     find_library(PETSC_LIBRARY_${_u_lib} ${_lib} PATHS ${_petsc_LIBRARY_DIRS})
     list(APPEND _petsc_libs ${PETSC_LIBRARY_${_u_lib}})
     mark_as_advanced(PETSC_LIBRARY_${_u_lib})
   endforeach()
 
   if (NOT _petsc_INCLUDE_DIRS)
     pkg_get_variable(_petsc_INCLUDE_DIRS ${_petsc_MODULE_NAME} includedir)
     #message(${_petsc_INCLUDE_DIRS})
   endif()
 
   find_path(PETSC_Fortran_INCLUDE_DIRS "finclude/petsc.h"
     PATHS ${_petsc_INCLUDE_DIRS}/petsc
     NO_CMAKE_PATH
     NO_DEFAULT_PATH
     )
 
   set(PETSC_LIBRARIES ${_petsc_libs} CACHE FILEPATH "")
   set(PETSC_INCLUDE_DIRS ${_petsc_INCLUDE_DIRS} CACHE PATH "")
 
 
   add_library(petsc::petsc INTERFACE IMPORTED)
   set_property(TARGET petsc::petsc PROPERTY INTERFACE_LINK_LIBRARIES ${PETSC_LIBRARIES})
   set_property(TARGET petsc::petsc PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${PETSC_INCLUDE_DIRS})
 
   add_library(petsc::petscf INTERFACE IMPORTED)
   target_link_libraries(petsc::petscf INTERFACE petsc::petsc)
   set_property(TARGET petsc::petscf PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${PETSC_Fortran_INCLUDE_DIRS})
 
 endif()
 
 include (FindPackageHandleStandardArgs)
 find_package_handle_standard_args(PETSc
   REQUIRED_VARS PETSC_LIBRARIES PETSC_INCLUDE_DIRS
   VERSION_VAR PETSC_VERSION)
diff --git a/cmake/FindParMETIS.cmake b/cmake/FindParMETIS.cmake
index 1f0c0df..f0c5425 100644
--- a/cmake/FindParMETIS.cmake
+++ b/cmake/FindParMETIS.cmake
@@ -1,62 +1,62 @@
 #
 # @file FindParMETIS.cmake
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Nicolas Richart <nicolas.richart@epfl.ch>
 #
 find_path(PARMETIS_INCLUDE_DIR parmetis.h
   PATHS "${PARMETIS_DIR}"
   ENV PARMETIS_DIR
   PATH_SUFFIXES include
   )
 
 find_library(PARMETIS_LIBRARY NAMES parmetis
   PATHS "${PARMETIS_DIR}"
   ENV PARMETIS_DIR
   PATH_SUFFIXES lib
   )
 
 mark_as_advanced(PARMETIS_LIBRARY PARMETIS_INCLUDE_DIR)
 
 #===============================================================================
 include(FindPackageHandleStandardArgs)
 if(CMAKE_VERSION VERSION_GREATER 2.8.12)
   if(PARMETIS_INCLUDE_DIR)
     file(STRINGS ${PARMETIS_INCLUDE_DIR}/parmetis.h _versions
       REGEX "^#define\ +PARMETIS_(MAJOR|MINOR|SUBMINOR)_VERSION .*")
     foreach(_ver ${_versions})
       string(REGEX MATCH "PARMETIS_(MAJOR|MINOR|SUBMINOR)_VERSION *([0-9.]+)" _tmp "${_ver}")
       set(_parmetis_${CMAKE_MATCH_1} ${CMAKE_MATCH_2})
     endforeach()
     set(PARMETIS_VERSION "${_parmetis_MAJOR}.${_parmetis_MINOR}" CACHE INTERNAL "")
   endif()
 
   find_package_handle_standard_args(ParMETIS
     REQUIRED_VARS
       PARMETIS_LIBRARY
       PARMETIS_INCLUDE_DIR
     VERSION_VAR
       PARMETIS_VERSION)
 else()
   find_package_handle_standard_args(ParMETIS DEFAULT_MSG
     PARMETIS_LIBRARY PARMETIS_INCLUDE_DIR)
 endif()
diff --git a/cmake/blas.cmake b/cmake/blas.cmake
index f4ef4d5..00fa014 100644
--- a/cmake/blas.cmake
+++ b/cmake/blas.cmake
@@ -1,63 +1,63 @@
 #
 # @file blas.cmake
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Nicolas Richart <nicolas.richart@epfl.ch>
 #
 set(_default_blas $ENV{BLA_VENDOR})
 if(NOT _default_blas)
   set(_default_blas All)
 endif()
 set(BSPLINES_USE_BLAS_VENDOR "${_default_blas}" CACHE STRING "Version of blas to use")
 mark_as_advanced(BSPLINES_USE_BLAS_VENDOR)
 set_property(CACHE BSPLINES_USE_BLAS_VENDOR PROPERTY STRINGS
   All
   ACML
   ACML_GPU
   ACML_MP
   ATLAS
   Apple
   CXML
   DXML
   Generic
   Goto
   IBMESSL
   Intel
   Intel10_32
   Intel10_64lp
   Intel10_64lp_seq
   NAS
   OpenBLAS
   PhiPACK
   SCSL
   SGIMATH
   SunPerf
   )
 
 if(BSPLINES_USE_PARDISO)
   set(BSPLINES_USE_BLAS_VENDOR Intel10_64lp CACHE STRING "" INTERNAL)
 endif()
 
 set(ENV{BLA_VENDOR} ${BSPLINES_USE_BLAS_VENDOR})
 
 find_package(BLAS REQUIRED)
 find_package(LAPACK REQUIRED)
diff --git a/cmake/bsplines-config.cmake.in b/cmake/bsplines-config.cmake.in
index 31605c5..bf7ad55 100644
--- a/cmake/bsplines-config.cmake.in
+++ b/cmake/bsplines-config.cmake.in
@@ -1,54 +1,54 @@
 #
 # @file bsplines-config.cmake.in
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 # - Config file for the BSPLINES package
 # It defines the target the following variables:
 #  FFLAGS               - Fortran compile flags
 #  BSPLINES_MODS        - include directories for bsplines modules
 #  BSPLINES_LIBS        - bsplines library
 #  BSPLINES_EXTRA_INCS  - additional include directories
 #  BSPLINES_EXTRA_LIBS  - additional libraries
 #  HAS_PARDISO          - BSPLINES built with PARDISO
 #  HAS_MUMPS            - BSPLINES built with MUMPS
 #  MPIEXEC              - MPI launcher
 #  MPIEXEC_NUMPROC_FLAG - Number of MPI processes flag
 
 # Compute paths
 get_filename_component(_dir "${CMAKE_CURRENT_LIST_FILE}" PATH)
 get_filename_component(_prefix "${_dir}/../.." ABSOLUTE)
 
 # Import the targets
 include("${_prefix}/lib/cmake/bsplines-targets.cmake")
 
 # Report other information
 set(FFLAGS "@CMAKE_Fortran_FLAGS@")
 set(BSPLINES_MODS "${_prefix}/include")
 set(BSPLINES_LIBS fft bsplines pppack pputils2)
 set(BSPLINES_EXTRA_INCS "@EXTRA_INCS@")
 set(BSPLINES_EXTRA_LIBS "@EXTRA_LIBS@")
 set(HAS_PARDISO "@HAS_PARDISO@")
 set(HAS_MUMPS "@HAS_MUMPS@")
 set(MPIEXEC "@MPIEXEC@")
 set(MPIEXEC_NUMPROC_FLAG "@MPIEXEC_NUMPROC_FLAG@")
diff --git a/docs/manual/Makefile b/docs/manual/Makefile
index 206a081..f77ff93 100644
--- a/docs/manual/Makefile
+++ b/docs/manual/Makefile
@@ -1,59 +1,59 @@
 #
 # @file bsplines.tex
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 dvi:	bsplines.dvi
 pdf:	bsplines.pdf
 ps:	bsplines.ps
 
 .SUFFIXES:
 .SUFFIXES: .sgml .html .tex .dvi .pdf .ps .txt
 
 .tex.dvi:
 	latex $<
 	@while ( grep "Rerun to get cross-references" \
 	         ${<:tex=log} > /dev/null ); do \
 	         latex $<; \
 	done
 	latex $<
 
 .dvi.pdf:
 	dvipdf $<
 
 .dvi.ps:
 	dvips $<
 
 bsplines.dvi: bsplines.tex driv1.eps fit.eps
 
 solvers.dvi: solvers.tex
 
 dirichlet_2d.dvi: dirichlet_2d.tex
 
 using_bsplines.dvi: using_bsplines.tex
 
 clean:
 	rm -f *~ *.dvi *.log *.aux *.out *~ *.toc *.flc *.bbl *.blg
 
 distclean: clean
 	rm -f bsplines.ps
diff --git a/docs/manual/bsplines.tex b/docs/manual/bsplines.tex
index ecc06a9..9b21e88 100644
--- a/docs/manual/bsplines.tex
+++ b/docs/manual/bsplines.tex
@@ -1,1297 +1,1297 @@
 %
 % @file bsplines.tex
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 % @author Stephan Brunner <stephan.brunner@epfl.ch>
 %
 \documentclass[a4paper]{article}
 \usepackage{linuxdoc-sgml}
 %\usepackage{a4wide}
 \usepackage{graphicx}
 \usepackage{hyperref}
 \usepackage{amsmath}
 %\usepackage{fancybox}
 %\usepackage[notref]{showkeys}
 
 \title{\tt BSPLINES Reference Guide}
 \author{Trach-Minh Tran, Stephan Brunner, Kurt Appert}
 \date{v0.3, February 2012}
 \abstract{Generalized splines of any order on irregular grids for
   interpolation and solving PDEs with FEM.}
 
 \begin{document}
 \maketitle
 \tableofcontents
 
 \section{Properties of Splines}
 In this section, several properties of splines will be shown in
 a more or less rigorous way. The aim is mainly to provide a
 minimum mathematical background for using the module \texttt{BSPLINES} for the
 \emph{interpolation} problem as well as the \emph{Finite Element Method}
 to solve PDEs.  More rigorous mathematical proofs can be found
 in the book by de Boor~\cite{deBoor}.
 
 \subsection{Recurrence Relation}
 We start by defining a finite interval $[a,b]$ subdivided into $N_x$
 intervals:
 \begin{equation}
   a=t_0 \le t_1 \le \ldots \le t_{N_x}=b.
 \end{equation}
 The sequence $t_i, i=0,\ldots,N_x$ can be irregularly spaced. The $j^{th}$
 spline of degree $p$ defined on this sequence of grid points (also called
 \textbf{knots}), is denoted by $\Lambda_{j}^{p}$ and can be constructed using
 the following recurrence relation. Starting with the \emph{constant} spline
 
 \begin{equation}
   \Lambda_i^0(x) =
   \begin{cases}
     1& \text{if $t_i \le x  < t_{i+1}$}, \\
     0& \text{otherwise}.
   \end{cases}
 \end{equation}
 the splines of degree $p>0$ for $t_i \le x  < t_{i+1}$ can be constructed from
 
 \begin{eqnarray}
   \Lambda_i^p &=& w_i^p\Lambda_i^{p-1} + (1-
                     w_{i+1}^p)\Lambda_{i+1}^{p-1}, \label{eq:recRel}\\
   w_{i}^{p} &=& \frac{x-t_i}{t_{i+p}-t_i}.
 \end{eqnarray}
 Thus the values of all \emph{non-zero}
 splines up to degree $p$ in the interval $[t_i,t_{i+1}]$ fit into
 the triangular array as shown in Fig.~\ref{fig:allSpl}. Starting from the
 first column with $\Lambda_i^0=1$, one can compute each of the $p+1$ entries
 in a subsequent column with Eq.~(\ref{eq:recRel}). Applying this procedure
 to generate splines on every intervals $[t_i,t_{i+1}], i=0,\ldots,N_{x}-1$
 would produce the sequence of $N_{x}+p$ splines of degree $p$:
 \( \Lambda_{-p}^{p},\ldots, \Lambda_{N_x-1}^{p}\).
 
 \subsection{Support and positivity}
  The linear spline
  \begin{equation*}
 \Lambda_i^1 = w_i^1\Lambda_i^0 + (1-w_{i+1}^1)\Lambda_{i+1}^{0}
    =\frac{x-t_i}{t_{i+1}-t_i}\Lambda_i^0 +
      \frac{t_{i+2}-x}{t_{i+2}-t_{i+1}}\Lambda_{i+1}^0
  \end{equation*}
 consists of 2 \emph{linear pieces} on $[t_i,t_{i+2}]$, forming a $C^0$
 function which breaks at $t_{i+1}$ and vanishes outside of this
 interval. Likewise, the quadratic spline
 \begin{eqnarray*}
 \Lambda_i^2 &=& w_i^2\Lambda_i^1 + (1-w_{i+1}^2)\Lambda_{i+1}^{1} \\
 &=& w^2_i w^1_i \Lambda_i^0 + [w^2_i(1-w^1_{i+1})+w^1_{i+1}(1-w^2_{i+1})]
 \Lambda_{i+1}^0 + (1-w^2_{i+1})(1-w^1_{i+2})\Lambda_{i+2}^0
 \end{eqnarray*}
 consists of 3 \emph{parabolic pieces} on $[t_i,t_{i+3}]$ that
 join to form a $C^1$ function which breaks at $t_{i+1}$ and $t_{i+2}$
 and vanishes outside of this interval. In general the spline of
 degree $p$ can be expressed as:
 \begin{equation}
 \Lambda^p_i = \sum_{r=0}^{p}\, b_{i+r}^p\Lambda_{i+r}^0
 \end{equation}
 where $b_{i+r}^p$ is a sum of products of $p$ linear functions, resulting
 in $p+1$ polynomials of degree $p$, joining to form
 a $C^{p-1}$ function which breaks at $t_i,\ldots,t_{i+p+1}$
 and vanishes outside of the \emph{support} $[t_i,t_{i+p+1}]$. From its construction,
 $\Lambda^p_i$ is clearly \emph{strictly positive} on the interior of $[t_i,t_{i+p+1}]$.
 \begin{equation}
 \Lambda^p_i (x) > 0, \qquad t_i<x<t_{i+p+1}.
 \end{equation}
 
 \begin{figure}
 \centering
 \( \begin{array}{cccccc}
      &                  &    &      &                        &                   \\
      &                  &    &      &                        &  0                \\
      &                  &    &      &  0                     &                \\
      &                  &    & \cdot&                        &  \Lambda^p_{i-p}  \\
      &                  & 0  &      &  \Lambda^{p-1}_{i-p+1} &                \\
      & 0                &    & \cdot&                        &  \Lambda^p_{i-p+1} \\
   0  &                  &\Lambda^{2}_{i-2}& &\Lambda^{p-1}_{i-p+2}  &  \\
      & \Lambda^{1}_{i-1}&    & \cdot&       &  \Lambda^p_{i-p+2} \\
  \Lambda^{0}_{i} &      &\Lambda^{2}_{i-1}&      & \cdot               &         \\
      & \Lambda^{1}_{i}  &    & \cdot&                 &  \cdot    \\
   0  &                  &\Lambda^{2}_{i}    &      & \Lambda^{p-1}_{i-1}  &     \\
      & 0                &    & \cdot&     &  \Lambda^p_{i-1}    \\
      &                  & 0  &      &  \Lambda^{p-1}_{i}                        &    \\
      &                  &    & \cdot&     &  \Lambda^p_{i}  \\
      &                  &    &      &                        &   \\
      &                  &    &      &  0                     &                   \\
      &                  &    &      &                        &  0
 \end{array} \)
 \caption{The array of all the splines of degree up to $p$ that are non-zero in
   $[t_i,t_{i+1}]$.}
 \label{fig:allSpl}
 \end{figure}
 
 \subsection{Sum of Splines}
 For $t_i\leq x < t_{i+1}$
 \begin{eqnarray*}
 \sum_j\,\Lambda_j^0 &=& \Lambda_i^0 = 1, \\
 \sum_j\,\Lambda_j^1 &=& \Lambda_{i-1}^1 + \Lambda_{i}^1= (1-w^1_i)\Lambda_i^0
 + w^1_i\Lambda_i^0 = 1.
 \end{eqnarray*}
 Thus assuming that for $p>1$:
 \[ \sum_{j=i-p+1}^{i}\,\Lambda_j^{p-1} = 1, \]
 or that the sum of the next to last column in Fig.~\ref{fig:allSpl} is $1$,
 we have, using the recurrence relation (\ref{eq:recRel})
 \begin{eqnarray*}
 \sum_{j=i-p}^{i}\,\Lambda_j^{p} &=& \sum_{j=i-p}^{i}\,
 \left( w^p_j\Lambda_j^{p-1} +(1-w^p_{j+1})\Lambda_{j+1}^{p-1} \right) \\
 &=& \sum_{j=i-p+1}^{i}\,w^p_j\Lambda_j^{p-1} +
     \sum_{j=i-p+1}^{i}\,(1-w^p_j)\Lambda_j^{p-1}  \\
 &=& \sum_{j=i-p+1}^{i}\,\Lambda_j^{p-1} = 1.
 \end{eqnarray*}
 
 \subsection{Derivative of Splines}
 The derivative of the splines of degree $p$ can be expressed in terms of the
 splines of degree $p-1$ by the following relation:
 \begin{equation}
   \label{derivative of splines}
   \frac{d}{dx}\Lambda_i^p =
   p\left(
   \frac{\Lambda_i^{p-1}}{t_{i+p}-t_i}
   - \frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
   \right).
 \end{equation}
 A straightforward consequence of this relation is that the splines of order
 $p$ are $C^{p-1}$ continuous. The demonstration of Eq.(\ref{derivative of
 splines}) is done by induction. One starts with the case $p=1$:
 \begin{eqnarray*}
 \frac{d}{dx}\Lambda_i^1
 & = &
 \frac{d}{dx}\left[
 w_i^1\Lambda_i^0 + (1-w^1_{i+1})\Lambda_{i+1}^0
 \right]
 \\
 & = &
 \frac{d\,w_i^1}{dx}\Lambda_i^0 + \frac{d\,(1-w^1_{i+1})}{dx}\Lambda_{i+1}^0
 + w_i^1\frac{d\,\Lambda_i^0}{dx} + (1-w^1_{i+1})\frac{d\,\Lambda_{i+1}^0}{dx}
 \\
 & = &
 \frac{1}{t_{i+1}-t_i}\Lambda_i^0 - \frac{1}{t_{i+2}-t_{i+1}}\Lambda_{i+1}^0,
 \end{eqnarray*}
 having used Eq.(\ref{eq:recRel}) and $d\,\Lambda_i^0/dx = 0$. One then assumes
 Eq.(\ref{derivative of splines}) true for $p-1$ and demonstrates that it
 remains true for $p$. This is done as follows:
 \begin{eqnarray}
 \label{demo deriv. 1}
 \frac{d}{dx}\Lambda_i^p
 & = &
 \frac{d}{dx}\left[
 w_i^p\Lambda_i^{p-1} + (1-w^p_{i+1})\Lambda_{i+1}^{p-1}
 \right]
 \\
 \nonumber
 & = &
 \frac{d\,w_i^p}{dx}\Lambda_i^{p-1}
 + \frac{d\,(1-w^p_{i+1})}{dx}\Lambda_{i+1}^{p-1}
 + w_i^p\frac{d\,\Lambda_i^{p-1}}{dx}
 + (1-w^p_{i+1})\frac{d\,\Lambda_{i+1}^{p-1}}{dx}
 \\
 \nonumber
 & = &
 \frac{\Lambda_i^{p-1}}{t_{i+p}-t_i}
 -\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
 \\
 \label{demo deriv. 2}
 &&
 + w_i^p (p-1)\left(
   \frac{\Lambda_i^{p-2}}{t_{i+p-1}-t_i}
   - \frac{\Lambda_{i+1}^{p-2}}{t_{i+p}-t_{i+1}}
 \right)
 + (1-w_{i+1}^p) (p-1)\left(
   \frac{\Lambda_{i+1}^{p-2}}{t_{i+p}-t_{i+1}}
   - \frac{\Lambda_{i+2}^{p-2}}{t_{i+p+1}-t_{i+2}}
   \right)
 \end{eqnarray}
 having used Eq.(\ref{eq:recRel}) to obtain (\ref{demo deriv. 1}), and the
 induction hypothesis to obtain Eq.(\ref{demo deriv. 2}). Now, rearranging the
 last two terms of Eq.(\ref{demo deriv. 2}), one easily obtains:
 \begin{eqnarray}
 \nonumber
 \frac{d}{dx}\Lambda_i^p
 & = &
 \frac{\Lambda_i^{p-1}}{t_{i+p}-t_i}
 -\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
 \\
 \nonumber
 &&
 +(p-1)\left[
 \frac{1}{t_{i+p}-t_i}
 \left(
 \frac{x-t_i}{t_{i+p-1}-t_i}\Lambda_i^{p-2}
 +\frac{t_{i+p}-x}{t_{i+p}-t_{i+1}}\Lambda_{i+1}^{p-2}
 \right)
 \right.
 \\
 \nonumber
 &&
 \hspace{2.cm}
 \left.
 -\frac{1}{t_{i+p+1}-t_{i+1}}
 \left(
 \frac{x-t_{i+1}}{t_{i+p}-t_{i+1}}\Lambda_{i+1}^{p-2}
 +\frac{t_{i+p+1}-x}{t_{i+p+1}-t_{i+2}}\Lambda_{i+2}^{p-2}
 \right)
 \right]
 \\
 \label{demo deriv. 3}
 & = &
 \frac{\Lambda_i^{p-1}}{t_{i+p}-t_i}
 -\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
 + (p-1)\left(
 \frac{\Lambda_i^{p-1}}{t_{i+p}-t_i}
 -\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
 \right)
 \\
 \nonumber
 & = &
 p\left(
 \frac{\Lambda_i^{p-1} }{t_{i+p}-t_i}
 -\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
 \right)
 \end{eqnarray}
 having again used Eq.(\ref{eq:recRel}) to obtain (\ref{demo deriv. 3}). This
 completes the demonstration of relation (\ref{derivative of splines}).
 
 
 \subsection{Integrals of Splines}
 
 With the proper normalization all splines of all degrees have unitary surface:
 \begin{equation}
   \label{integrals of splines}
   \frac{p+1}{t_{i+p+1}-t_i}\int \Lambda_i^p(x)dx =  1.
 \end{equation}
 This relation holds trivially for $p=0$ and $p=1$. A recursive proof of the
 general statement (\ref{integrals of splines}) starts assuming
 \begin{equation}
   \label{previousInt}
   \frac{p}{t_{i+p}-t_i}\int \Lambda_i^{p-1}(x)dx =  1
 \end{equation}
 to be true. Then using Eq.(\ref{derivative of splines}) multiplied by $x$ and integrating one obtains:
 \begin{equation}
 \nonumber
 \int x \frac{d}{dx}\Lambda_i^p dx = -\int \Lambda_i^p dx =
   p\int\left( \frac{x\Lambda_i^{p-1}}{t_{i+p}-t_i}
   -\frac{x\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
  \right)dx.
 \end{equation}
 Completing the fractions in the big parentheses in view of using
 Eq.(\ref{eq:recRel}) one has
 \begin{eqnarray}
 \nonumber
 \int \Lambda_i^p dx
   & = &
           -\, p\int \frac{x-t_i}{t_{i+p}-t_i}\Lambda_i^{p-1} dx
           - p\int \frac{t_i}{t_{i+p}-t_i}\Lambda_i^{p-1} dx \\
 \nonumber
   & &
           -\, p\int \frac{t_{i+p+1}-x}{t_{i+p+1}-t_{i+1}}\Lambda_{i+1}^{p-1} dx
           + p\int \frac{t_{i+p+1}}{t_{i+p+1}-t_{i+1}}\Lambda_{i+1}^{p-1} dx,
 \end{eqnarray}
 where the first and the third terms on the right correspond to $-p\int\Lambda_i^p dx $,
 Eq.(\ref{eq:recRel}), and can be combined with the left side to yield
 \begin{equation}
 \label{proofIntegrals}
 (1+p)\int\Lambda_i^p dx = t_{i+p+1}-t_i,
 \end{equation}
 where relation (\ref{previousInt}) has been used for the rest on the right hand side.
 This concludes the proof of Eq.(\ref{integrals of splines}).
 
 
 \subsection{Boundary Conditions}
 Applying the recurrence relation to generate \emph{all} the splines on
 the finite domain $[t_0,t_{N_x}]$ yields the $N_x+p$ splines
 of degree $p$:
 \begin{equation}
   \Lambda^p_{-p},\Lambda^p_{-p+1},\ldots, \Lambda^p_{N_x-1}. \label{eq:splSeq}
 \end{equation}
 Note that \emph{additional} knots beyond both ends of $[t_0,t_{N_x}]$ have to
 be defined to generate all these splines.
 
 \subsubsection{Periodic splines}
 The  extra knots are simply defined through periodicity.:
 \begin{eqnarray}
   t_{-\nu} &=& t_{N_x-\nu}-(b-a), \\
   t_{N_{x}+\nu} &=& t_{\nu}+(b-a), \qquad \nu=0,\ldots,p.
 \end{eqnarray}
 The $p+1$ leftmost splines in (\ref{eq:splSeq}) are thus identical to the rightmost splines:
 \begin{equation}
 \Lambda^p_{-\nu} = \Lambda^p_{N_x-\nu}, \qquad \nu=0,\ldots,p.
 \end{equation}
 
 \subsubsection{Non-periodic splines}
 The choice made in \texttt{BSPLINES} is simply:
 \begin{equation}
   t_{-p} = \cdots = t_{0} = a, \qquad b=t_{N_x}=\cdots=t_{N_x+p}.
 \end{equation}
 Thus in the first interval $[t_0,t_1]$, the first spline $\Lambda^p_{-p}$
 is constructed (refer to the first entry on each of the column of
 Fig.~\ref{fig:allSpl}, with $i=0$) as follow:
 \begin{eqnarray*}
  \Lambda^1_{-1} &=& (1-w^1_{0})\Lambda^0_{0} = \frac{t_{1}-x}{t_{1}-t_{0}}\Lambda^0_{0}\\
  \Lambda^2_{-2} &=& (1-w^2_{-1})\Lambda^1_{-1} =\frac{t_{1}-x}{t_{1}-t_{-1}}\Lambda^1_{-1}
  =\left(\frac{t_{1}-x}{t_{1}-t_{0}}\right)^{2}\Lambda^0_{0}\\
 \cdot  & & \qquad\cdot\qquad\qquad\qquad\qquad \cdot \\
  \Lambda^p_{-p} &=& (1-w^p_{-p+1})\Lambda^p_{-p+1} =\frac{t_{1}-x}{t_{1}-t_{-p+1}}\Lambda^{p-1}_{-p+1}
  =\left(\frac{t_{1}-x}{t_{1}-t_{0}}\right)^{p}\Lambda^0_{0}
 \end{eqnarray*}
 
 
 In the same manner, the generation of the \emph{last} spline $\Lambda^p_{N_x-1}$ (last entry
 on each of the column of Fig.~\ref{fig:allSpl}, with $i=N_{x}-1$) yields:
 \begin{eqnarray*}
 \Lambda^1_{N_x-1} &=& w^1_{N_x-1}\Lambda^0_{N_x-1} =
    \frac{x-t_{N_x-1}}{t_{N_x}-t_{N_x-1}}\Lambda^0_{N_x-1} \\
 \Lambda^2_{N_x-1} &=& w^2_{N_x-1}\Lambda^1_{N_x-1} =
    \frac{x-t_{N_x-1}}{t_{N_x+1}-t_{N_x-1}}\Lambda^1_{N_x-1}
    = \left(\frac{x-t_{N_x-1}}{t_{N_x}-t_{N_x-1}}\right)^{2}\Lambda^0_{N_x-1}\\
 \cdot  & & \qquad\cdot\qquad\qquad\qquad\qquad \cdot \\
 \Lambda^p_{N_x-1} &=& w^p_{N_x-1}\Lambda^p_{N_x-1} =
    \frac{x-t_{N_x-1}}{t_{N_x+p-1}-t_{N_x-1}}\Lambda^1_{N_x-1}
    = \left(\frac{x-t_{N_x-1}}{t_{N_x}-t_{N_x-1}}\right)^{p}\Lambda^0_{N_x-1}
 \end{eqnarray*}
 
 Since the sum of all splines is 1 and using the positivity of splines, all the
 non-periodic splines, except the first (last) spline should vanish at $x=a$ ($x=b$):
 \begin{equation}
   \Lambda^p_{r}(a) = \delta_{r,-p}, \qquad\Lambda^p_{r}(b) = \delta_{r,N_x-1}
 \end{equation}
 
 The spline derivatives at the boundaries $x=a$ and $x=b$ can be
 derived using Eq.(\ref{derivative of splines}) as follow. At $x=a$
 (interval $[t_0,t_1]$), by noting that only the spline
 $\Lambda^{p-1}_{-p+1}$ is non-zero at $x=a$ (see next to last column
 of Fig.{\ref{fig:allSpl}, with $i=0$), it is easy to see that
 there are only 2 non-zero derivatives
 given by
 \begin{equation}
   \begin{split}
   \frac{d}{dx}\Lambda^p_{-p}(a) =&
   -\frac{p\,\Lambda^{p-1}_{-p+1}(a)}{t_1-t_{-p}} = -\frac{p}{t_1-t_0}, \\
   \frac{d}{dx}\Lambda^p_{-p+1}(a) =&
   \frac{p\,\Lambda^{p-1}_{-p+1}(a)}{t_1-t_{-p+1}} = \frac{p}{t_1-t_0},
   \end{split}
 \end{equation}
 where we have used $t_0=t_{-1}=\ldots=t_{-p}=a$. Likewise, the 2
 non-zero derivatives of spline at the other boundary $x=b$ are
 \begin{equation}
   \begin{split}
   \frac{d}{dx}\Lambda^p_{N_x-2}(b) =&
   -\frac{p\,\Lambda^{p-1}_{N_x-1}(b)}{t_{N_x+p-1}-t_{N_x-1}} = -\frac{p}{t_{N_x}-t_{N_x-1}}, \\
   \frac{d}{dx}\Lambda^p_{N_x-1}(b) =&
   \frac{p\,\Lambda^{p-1}_{N_x-1}(b)}{t_{N_x+p-1}-t_{N_x-1}} = \frac{p}{t_{N_x}-t_{N_x-1}},
   \end{split}
 \end{equation}
 where we have used $t_{N_x}=t_{N_x+1}=\ldots=t_{N_x+p}=b$.
 
 \subsubsection{Spline expansion}
 In summary, the approximation of a function $f$ defined
 in the interval $[a,b]$ using a basis (\textbf{Is this obvious?) }of
 splines of degree $p$ associated with the sequence of knots
 $t_i, i=-p,\ldots,N_{x}+p$
 can be written as
 \begin{equation}
   f(x) = \sum_{j=-p}^{N_x-1}\, c_j\Lambda^p_j(x), \qquad
   \begin{array}{l}
     \mbox{support of $\Lambda^p_j$:}\quad [t_{j},t_{j+p+1}],\\
     t_i \leq x < t_{i+1} \Longrightarrow \Lambda^p_{i-p}(x),\ldots,
     \Lambda^p_{i}(x) \ge 0.
   \end{array}
 \end{equation}
 
 Note that the \emph{last} spline in the interval $[t_i,t_{i+1}]$, which can be written as
 \[ \Lambda^p_{i}(x)=w^p_i(x) \Lambda^{p-1}_{i}(x)=\ldots=w^p_i(x) w^{p-1}_i(x) \ldots
 w^{1}_i(x)\Lambda^{0}_{i}(x) \]
 \emph{vanishes at the knot} $x=t_i$. Thus at any position $x$, the sum
 involves $p+1$ terms except at the knots $t_i$ where there are only $p$ terms.
 
 It is sometimes more convenient to renumber the spline index $j$ so that it
 starts from $0$. With this new numbering, the spline expansion becomes
 \begin{equation}
   f(x) = \sum_{j=0}^{N_x+p-1}\, c_j\Lambda^p_j(x), \qquad
   \begin{array}{l}
     \mbox{support of $\Lambda^p_j$:}\quad [t_{j-p},t_{j+1}], \\
     t_i \leq x < t_{i+1} \Longrightarrow \Lambda^p_{i}(x),\ldots,
     \Lambda^p_{i+p}(x) \ge 0.
   \end{array}
  \label{eq:splExp}
 \end{equation}
 
 In the \emph{periodic} case, there are $N_{x}$ \emph{independent}
 spline coefficients since
 \begin{equation}
   c_{N_{x}+\nu} = c_{\nu}, \qquad \nu=0,\ldots,p-1. \label{eq:perSp}
 \end{equation}
 
 In the \emph{non-periodic} case,
 the first and the last spline coefficients $c_{0},\,c_{N_x+p-1}$ are
 respectively the values of $f$ at the end points $a$ and $b$.
 
 The basis functions for both non-periodic and periodic cubic splines
 ($p=3$) are shown in Fig~.\ref{fig:cubic_splines} where this new numbering is
 used.
 
 
 \begin{figure}[htbp]
   \centering
   \includegraphics[angle=0,width=\hsize]{driv1}
   \caption{The basis of non-periodic and periodic cubic splines. The periodic
    splines $\Lambda_{10}$, $\Lambda_{11}$, $\Lambda_{12}$ denote the same
    splines as $\Lambda_{0}$, $\Lambda_{1}$, $\Lambda_{2}$ respectively. }
   \label{fig:cubic_splines}
 \end{figure}
 
 \subsection{Spline Initialization with \texttt{SET\_SPLINE}}
 The initialization of a spline is performed by calling the routine
 \texttt{SET\_SPLINE}, passing the desired degree $p$ and the
 sequence of grid points (or knots) $t_j, j=0,\ldots,N_x$. If
 Gauss points on each of the intervals $[t_j,t_{j+1}]$ are needed, a non-zero
 value of \texttt{NGAUSS} should be specified. The other input
 argument is the \emph{optional} \texttt{LOGICAL} argument \texttt{PERIOD}
 to define the periodicity of the splines. By default it is \texttt{.FALSE.}.
 The routine returns the 1d spline \texttt{SP} which is of type
 \texttt{TYPE(spline1d)}:
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE set_spline(p, ngauss, grid, sp, period)
     INTEGER, INTENT(in) :: p, ngauss
     DOUBLE PRECISION, INTENT(in)  :: grid(:)
     LOGICAL, OPTIONAL, INTENT(in) :: period
     TYPE(spline1d), INTENT(out)   :: sp
     LOGICAL, OPTIONAL, INTENT(in) :: period
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 
 Besides the main characteristics of the spline (degree $p$ of splines,
 number of grid intervals, dimension of splines $N_x+p$, etc.)
 the following quantities will be determined and stored in \texttt{SP}:
 \begin{itemize}
  \item values and all the $p$ derivatives of the $p+1$ non-vanishing splines
   on each knots $t_j$. These quantities will be used to speed up computation
   of the spline expansion (\ref{eq:splExp}).
   \item integrals of splines \(I_i=\int\Lambda_i(x)\,dx\).
 \end{itemize}
 
 For a 2d spline
 \begin{equation}
   \Lambda^{p+q}_{ij}(x,y) = \Lambda^p_i(x)\Lambda^q_j(y),
 \end{equation}
 on a 2d structured mesh defined by the grid points
 \texttt{grid1(0:N1), grid2(0:N2)}, the same call as in the 1d case can be
 used, except that the scalars \texttt{p, ngauss, period} become 2 element arrays and
 the output \texttt{SP} is now of type \texttt{TYPE(spline2d)}:
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   INTEGER :: p(2), ngauss(2)
   LOGICAL, OPTIONAL :: period(2)
   DOUBLE PRECISION, dimension(:) :: grid1, grid2
   TYPE(spline2d) :: sp2d
 ...
   CALL set_spline(p, ngauss, grid1, grid2, sp2d, period)
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 
 The derived type \texttt{spline2d} is a \emph{wrapper} of 2 \texttt{spline1d}
 objects which can be accessed through \texttt{sp2d\%sp1} and \texttt{sp2d\%sp2}.
 
 Once \texttt{SET\_SPLINE} is called, the routine \texttt{GET\_DIM} can be
 called to inquire the spline's essential characteristics such as dimension,
 number of intervals and degree, for both 1d and 2d splines: \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE get_dim(sp, dim, nx, nidbas)
     TYPE(spline1d), INTENT(in) :: sp
     INTEGER, INTENT(out) :: dim
     INTEGER, OPTIONAL, INTENT(out) :: nx, nidbas
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 
 Integral of function $\int^b_a\,f(x)dx$ is computed from its spline
 \texttt{sp} and splines coefficients in:
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   DOUBLE PRECISION FUNCTION fintg(sp, c)
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE PRECISION, INTENT(in) :: c(:)
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 
 For a 2d functions, the same function should be called with a 2d spline
 \texttt{sp} and 2d array $c$.
 
 Finally \texttt{DESTROY\_SP(sp)} should be called when a spline
 \texttt{sp} is not needed anymore to clean up memory space.
 
 
 \subsection{Generating Splines with \texttt{DEF\_BASFUN}}
 
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE def_basfun(xp, sp, fun, left)
     DOUBLE PRECISION, INTENT(in) :: xp
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE PRECISION, INTENT(out) :: fun(:,:)
     INTEGER, OPTIONAL, INTENT(out) :: left
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 
 This routine computes, for a given point $\mbox{xp}\in [t_0,t_{N_x}]$, the value
 and optionally the $m$ derivatives of the $p+1$ splines \texttt{sp} which were previously
 defined and returns them in \texttt{fun(1:p+1,1:m+1)} with $m\leq p$. The maximum
 number of computed derivatives $m$ is determined by the size of the second dimension
 of the array \texttt{fun}. The subroutine will return the optional integer
 \texttt{left} defined such that:
 \[
 t_{\mbox{left}} \leq xp < t_{\mbox{left+1}}, \qquad 0\leq \mbox{left} \leq N_{x.-1}.
 \]
 
 \subsection{Example 1: Values and derivatives of all splines}
 In this example, we first initialize a cubic spline with
 the knot sequence $t_0,\ldots,t_{N_x}$ with \texttt{SET\_SPLINE}
 and then call \texttt{DEF\_BASFUN} to compute its values, first and second
 derivatives on the mesh points \texttt{xp(1:npts)}.
 
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   USE BSPLINES
   INTEGER, PARAMETER :: nx=10, npts=100
   DOUBLE PRECISION :: t(0:nx), xp(npts)
   DOUBLE PRECISION, ALLOCATABLE :: fxp0(:,:), fxp1(:,:), fxp2(:,:)
   DOUBLE PRECISION :: fun(4,3)   ! 4 cubic splines at a given xp
                                  ! plus first and second derivatives.
   INTEGER :: i, dim, left
   TYPE(spline1d) :: sp
 !
 !   Define t(0:nx), xp(npts)
 !
   CALL set_spline(3, 0, t, sp, period=.FALSE.)
   CALL get_dim(sp, dim)
   ALLOCATE(fxp0(npts,0:dim-1), fxp1(npts,0:dim-1), fxp2(npts,0:dim-1)
   fxp0 = 0.0
   fxp1 = 0.0
   fxp2 = 0.0
   DO i=1,npts
      CALL def_basfun(xp(i), sp,  fun, left=left)
      fxp0(i, left:left+3) = fun(1:4, 1)   ! Value
      fxp1(i, left:left+3) = fun(1:4, 2)   ! 1st derivative
      fxp2(i, left:left+3) = fun(1:4, 3)   ! 2nd derivative
   END DO
   DEALLOCATE(fxp0, fxp1, fxp2)
   CALL destroy_sp(sp)
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 
 This code fragment will store \texttt{dim=nx+3=13} splines
 and theirs first 2 derivatives in \texttt{fxp0}, \texttt{fxp1} and
 \texttt{fxp2}. Change the \texttt{period} to \texttt{.TRUE.} to obtain
 \emph{periodic} splines.
 
 \section{Spline Interpolation}
 Given the interval $[a,b]$ discretized into $\{x_k,\,k=0,\ldots,N_g\}$ with
 $x_0=a$ and $x_{N_g}=b$, the problem of interpolating $f(x), x\in [a,b]$ with
 splines of degree $p$ is to solve the following equations
 for the spline coefficients $c_i$:
 \begin{equation}
   \sum_{i=0}^{N_x+p-1}\,c_i\Lambda^p_{i}(x_k) = f(x_k),\quad k=0,\ldots,N_g.
   \label{eq:intEq}
 \end{equation}
 
 The sequence of knots $t_0,\ldots,t_{N_x}$ defines completely the splines
 $\Lambda^p_i$ and its choice will be described in the following section.
 
 \subsection{Choice of knots}
 If  Eqs.~(\ref{eq:intEq}) are the only conditions for our interpolation
 problem,  the number of equations should match the number
 of unknowns $c_i$. The number of knot intervals $N_x$ hence has to verify
 \begin{equation}
 N_x = N_g-p+1.\label{eq:knotNum}
 \end{equation}
 
 For the \emph{periodic} case, taking into account the $p$ periodic spline conditions
 (\ref{eq:perSp}) on
 $c_i$ and $f(a)=f(b)$, this condition reduces to:
 \begin{equation}
 N_x = N_g.
 \end{equation}
 
 For \emph{odd} values of the spline degree $p$, the knots $t_i$ could be placed on
 the \emph{interpolation sites} $x_k$ while when $p$ is even, $t_i$ should not
 be on $x_k$ to avoid a \emph{badly conditioned} linear system when solving
 Eq.~(\ref{eq:intEq}). This leads to the following choice for $t_i$ in
 \texttt{BSPLINES}:
 
 \subsubsection{Periodic splines}
 The number of knots $N_x+1$ is \emph{equal} to the number of interpolation points
 $N_g+1$ with
 \begin{equation}
   t_i =
   \begin{cases}
     x_i & \text{$p$ odd} \\
     (x_{i-1}+x_i)/2 & \text{$p$ even}
   \end{cases}
   ,\qquad i=0,\ldots,N_x
 \end{equation}
 
 \subsubsection{Non-Periodic splines}
 In order to satisfy the equality (\ref{eq:knotNum}), first, the 2 end points
 are retained as knots:
 \begin{equation}
 t_0=x_0, \qquad t_{N_x} = x_{N_g}.
 \end{equation}
 
 For even $p$, the first $p/2$ interpolation intervals are \emph{skipped}:
 \begin{equation}
 t_i = (x_{i+p/2-1} + x_{i+p/2})/2, \qquad i=1,\ldots,N_x-1, \label{eq:evenKnots}
 \end{equation}
 
 while for odd $p$, $(p-1)/2$ interpolation points are \emph{skipped}:
 \begin{equation}
   t_i = x_{i+(p-1)/2 }, \qquad i=1,\ldots,N_x-1.  \label{eq:oddKnots}
 \end{equation}
 
 Instead of skipping grid points, an alternative would be to supplement the
 system of equations (\ref{eq:intEq}) with conditions on derivatives of $f(x)$
 at one or both ends of $[a,b]$. This type of boundary conditions is not
 implemented in the present version of the \texttt{BSPLINES} module.
 
 \subsection{The collocation matrix}
 The \emph{collocation matrix} $\Lambda^p_i(x_k)$ of the interpolation problem
 (\ref{eq:intEq}) is a square matrix.  Each row has at most $p+1$ non-zero
 terms. Let us consider separately the non-periodic and the periodic cases.
 
 \subsubsection{The non-periodic case}
 \paragraph{Even spline degree}
 
 From (\ref{eq:evenKnots}), there are $p/2+1$ interpolation points \(x_{0}, \ldots, x_{p/2}\)
 in the first knot interval $[t_0,t_1)$. Since there are at most $p+1$ non-zero
 splines for any points in each interval (except for $x_0$ where $\Lambda_i(x_0)=\delta_{i,0}$,
 the collocation matrix starts as:
 
 \begin{equation}
   \left(\begin{array}{llllll}
    \Lambda_0(x_{0})  & 0                  &\cdots &  \cdots            & \cdots              &\cdots \\
    \Lambda_0(x_{1})  &\Lambda_1(x_{1})    &\cdots &\Lambda_p(x_{1})    & 0                   &\cdots \\
    \vdots            &\vdots              &\cdots &\vdots              & 0                   &\cdots \\
    \Lambda_0(x_{p/2})&\Lambda_1(x_{p/2})  &\cdots &\Lambda_p(x_{p/2})  & 0                   &\cdots \\
    0                 &\Lambda_1(x_{p/2+1})&\cdots &\Lambda_p(x_{p/2+1})& \Lambda_{p+1}(x_{p/2+1})&0 \\
    0                 &\ddots              &\ddots &\ddots              & \ddots              &\ddots
   \end{array}\right)
 \end{equation}
 
 The number of \emph{upper-diagonals} (non including  the diagonal) is
 obviously determined by the second row of the matrix above, which yields $p-1$.
 Since the knot placement is identical for both ends of the interpolation mesh,
 the matrix
 $\Lambda_i(x_k)$ is \emph{banded} with half-bandwidths
 \begin{equation}
 kl=ku=p-1
 \end{equation}
 
 \paragraph{Odd spline degree}
 Applying the same procedure, it is straightforward to show for $p$ odd and
 from (\ref{eq:oddKnots}), that $x_0,\ldots.x_{(p-1)/2}$ are located in the
 first knot interval $[t_0,t_1)$ and that the matrix has again the same
 half-bandwidths as in the even $p$ case.
 
 The  resulting interpolation problem can then be solved with
 the usual \emph{banded matrix factorization} followed by a \emph{back-solve}
 phase.
 
 \subsubsection{The periodic case}
 Let consider the matrix for $p=3$ and $N_x=10$ (see lower figure of
 Fig.~(\ref{fig:cubic_splines}):
 \begin{equation}
   \left(\begin{array}{llllll}
   \Lambda_0(x_{0}) & \Lambda_1(x_{0}) & \Lambda_2(x_{0}) & 0                & \cdots           &      \\
   0                & \Lambda_1(x_{1}) & \Lambda_2(x_{1}) & \Lambda_3(x_{1}) & 0                &\cdots \\
   \vdots           & \ddots           & \ddots           & \ddots           & \ddots           &\ddots \\
   0                & \cdots           & 0                & \Lambda_7(x_{7}) & \Lambda_8(x_{7}) & \Lambda_9(x_{7}) \\
   \Lambda_0(x_{8}) & 0                & 0                & \cdots           &\Lambda_8(x_{8})  & \Lambda_9(x_{8}) \\
   \Lambda_0(x_{9}) &\Lambda_1(x_{9})  & 0                & 0                & \cdots           & \Lambda_9(x_{9})
   \end{array}\right) \label{eq:perMat}
 \end{equation}
 The matrix is ``almost triangular'' (except for the last 2 rows) and is not
 \emph{diagonally dominant}! A more satisfactory (and symmetric in shape) matrix is
 however obtained by simply renumbering the splines such that the sequence
 starts with $-\lfloor p/2 \rfloor$ instead of $0$. This renumbered splines
 are  shown in Fig.~\ref{fig:fitSpl} for the cubic
 and quadratic periodic splines. With this renumbering, the matrix
 (\ref{eq:perMat})  has a more symmetric shape and is diagonally dominant:
 \begin{equation}
   \left(\begin{array}{lllll}
   \Lambda_0(x_{0}) & \Lambda_1(x_{0}) & 0                & \cdots           & \Lambda_9(x_{0}) \\
   \Lambda_0(x_{1}) & \Lambda_1(x_{1}) & \Lambda_2(x_{1}) & 0                &\cdots \\
   \vdots           & \ddots           & \ddots           & \ddots           &\ddots \\
   0                & \cdots           & \Lambda_7(x_{8}) & \Lambda_8(x_{8}) & \Lambda_9(x_{8}) \\
   \Lambda_0(x_{9}) & 0                & 0                & \Lambda_8(x_{9}) & \Lambda_9(x_{9})
   \end{array}\right) \label{eq:perMatnew}
 \end{equation}
 
 In general, for arbitrary $p$ (even and odd values), the collocation matrix
 $A=\Lambda_j(x_i)$ can be written as
 
 \begin{equation}
   A = B + UV^T
 \end{equation}
 
 where $B$ is a banded matrix with half-bandwidths $kl=ku=b=\lfloor p/2\rfloor$ and
 rank $N_x$. $U$ and $V$ are $N_x\times 2b$ sparse matrices:
 
 \begin{equation}
   U = \left(
   \begin{matrix}
     I & 0 \\
     0 & 0 \\
     0 & I
   \end{matrix}\right), \qquad
   V = \left(
   \begin{matrix}
     0 & D^T \\
     0 & 0 \\
     C^T & 0
   \end{matrix}\right), \qquad
   V^T = \left(
   \begin{matrix}
     0   & 0 & C \\
     D & 0 & 0
   \end{matrix}\right), \qquad
 \end{equation}
 where $C$, $D$ are the $b\times b$ \emph{off-band} sub-matrices and $I$, the
 identity matrix. In the cubic spline example considered above, the
 \emph{off-band} matrices are simply $1\times 1$ matrices with
 $C=\Lambda_9(x_0)$ and $D=\Lambda_0(x_9)$.
 
 The inverse of $A$ can be deduced from the \emph{Sherman-Morrison-Woodbury formula} \cite{Golub}:
 
 \begin{eqnarray*}
   A^{-1} &=& B^{-1} - B^{-1}U(1+V^{T}B^{-1}U)^{-1}V^{T}B^{-1} \\
          &=& B^{-1} - ZW^{T}B^{-1},
 \end{eqnarray*}
 where
 \begin{eqnarray*}
   Z    &=&  B^{-1}U, \\
   H    &=& 1+V^{T}B^{-1}U \\
   W^T  &=& H^{-1}V^{T}.
 \end{eqnarray*}
 
 The solution of the interpolation problem $Ax=f$ can then be reduced to a
 \emph{factorization} and a \emph{back-solve} phase:
 
 \begin{enumerate}
 \item Factorization
   \begin{enumerate}
   \item Factor: \( B \longleftarrow L_BU_B \)
   \item Solve:  \( (L_BU_B)Z = U, \quad U\longleftarrow Z \)
   \item Compute: \( H = 1+V^{T}Z \)
   \item Factor:  \( H=L_HU_H \)
   \item Solve:   \( (L_HU_H)W^{T} = V^{T}, \quad V^{T}\longleftarrow W^{T} \)
   \end{enumerate}
 \item Back-solve
   \begin{enumerate}
   \item Solve: \( (L_BU_B)y = f \)
   \item Compute: \( t = W^{T}y \)
   \item Compute: \( x = y - Zt \)
   \end{enumerate}
 \end{enumerate}
 
 At the end of the factorization, only the (updated) matrices $B$, $U$ and
 $V^{T}$, required in the back-solve phase, need to be saved.
 Note that we avoid to store the product
 $ZW^T$ because it is a \emph{big} $N_x\times N_x$ matrix.
 
 After the \emph{back-solve} step, the solution $x$ is \emph{shifted back} (by
 $\lfloor p/2\rfloor$) and the appropriate periodicity condition is applied to
 obtain the spline coefficients $c_j,\, j=0,\ldots,N_x+p-1$, as defined in
 (\ref{eq:splExp}).
 
 \begin{figure}[htbp]
   \centering
   \includegraphics[angle=0,width=\hsize]{fit}
   \caption{The periodic cubic and quadratic splines used
    for interpolation. The spline knots are indicated by \emph{blue full circles} and
    the interpolation points, by \emph{dashed vertical lines} }
   \label{fig:fitSpl}
 \end{figure}
 
 \subsection{\texttt{PP} representation}
 The computation of $f(x)$ using directly the spline expansion Eq.~(\ref{eq:splExp}) can
 be costly, because of the evaluation of the splines $\Lambda^p_j(x)$,
 especially when interpolating on large number of points. Expanding $f(x)$,
 using truncated Taylor series in each interval $[t_\mu,t_{\mu+1}]$, we obtain
 the following \emph{Piecewise Polynomial Function} representation (or
 \texttt{ppform}) of $f(x)$:
 \begin{equation}
   f(x) = \sum^p_{k=0}\, \Pi_{k\mu}(x-t_\mu)^k, \quad t_\mu\leq x<t_{\mu+1},
   \label{eq:ppRep}
 \end{equation}
 
 where
 
 \begin{equation}
   \Pi_{k\mu} = \frac{1}{k!} \frac{d^k}{dx^k} f(t_\mu)
 = \frac{1}{k!} \sum_j\,c_j\frac{d^k}{dx^k} \Lambda_j(t_\mu)
 = \sum_j\,c_j\alpha_{j\mu k}.
 \label{eq:ppCoef}
 \end{equation}
 
 Note that
 \begin{equation}
   \alpha_{j\mu k} = \frac{1}{k!} \frac{d^k}{dx^k} \Lambda_j(t_\mu)
 \label{eq:derSpl}
 \end{equation}
 
 depend only on the spline specifications. They are \emph{pre-calculated}
 in the spline setup routine \texttt{SET\_SPLINE} and stored in the
 3d arrays \texttt{sp\%val0}. The \texttt{PP} coefficients
 $\Pi_{k\mu}$ can be computed once the spline coefficients $c_j$ are available,
 using (\ref{eq:ppCoef}).
 Then the interpolated function values together with the $p$ derivatives
 can be calculated \emph{efficiently} using the power series.
 
 \begin{eqnarray*}
    f(x) &=& \sum^p_{k=0}\, \Pi_{k\mu}(x-t_\mu)^k \\
    f'(x) &=& \sum^p_{k=1}\, k\,\Pi_{k\mu}(x-t_\mu)^{k-1} \\
    f''(x) &=& \sum^p_{k=2}\, k(k-1)\,\Pi_{k\mu}(x-t_\mu)^{k-2} \\
     \vdots
 \end{eqnarray*}
 
 These 2 steps are performed in \texttt{GRIDVAL}. Note that the first step
 (computation of $\Pi_{k\mu}$ from $c_j$) can be skipped for subsequent calls
 to \texttt{GRIDVAL} with the same function $f$, for example to compute $f$ or
 its derivatives at any others points $x$.
 
 \subsection{Example 2: Cubic spline interpolation}
 Given a function $f$ with its grid values \texttt{f(1:nx)}, the
 following example determines the spline coefficients \texttt{c(1:dim)}. Using these
 coefficients, the interpolated $f$ and derivative $f'$ are then computed on the mesh points
 \texttt{xp(1:npts)}. Note that the second call to \texttt{GRIDVAL} does not
 include the spline coefficients \texttt{c} to signal that the previously calculated
 \texttt{PP} coefficients will be reused.
 
 
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   USE BSPLINES
   INTEGER, PARAMETER :: nx=10, npts=100
   DOUBLE PRECISION :: x(0:nx), f(0:nx), xp(npts), fp0(npts), fp1(npts)
   DOUBLE PRECISION, ALLOCATABLE :: c(:)
   INTEGER :: dim
   TYPE(spline1d) :: sp
 !
 !   Define x and f
 !
   CALL set_splcoef(3, x, sp, period=.FALSE.)
   CALL get_dim(sp, dim)
   ALLOCATE(c(dim))
   CALL get_splcoef(sp, f, c)   ! compute spline coefs c(1:dim)
 !
 !   Compute interpolated f and f' on xp(npts)
 !
   CALL gridval(sp, xp, fp0, 0, c)
   CALL gridval(sp, xp, fp1, 1)
 !
   DEALLOCATE(c)
   CALL destroy_sp(sp)
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 
 The description of each of the routines called in the example above is briefly
 given below:
 
 \begin{description}
 \item[\texttt{SET\_SPLCOEF}] Determines the spline knots, sets up the splines
   with \texttt{SET\_SPLINE}, assembles the collocation matrix
   and performs its \emph{factorization}.
 \item[\texttt{GET\_SPLCOEF}] Computes the spline \emph{coefficients} $c$ from the input
   grid values of function $f$ (\emph{back-solve phase}), using the factorized matrix.
 \item[\texttt{GRIDVAL}] Compute the \texttt{PP} coefficients using
   (\ref{eq:ppCoef}) \emph{if $c$ is provided}, locates the interval containing
   the given point $x$ and computes interpolated function values or derivatives
   using the \texttt{PP} representation (\ref{eq:ppCoef}).
 \end{description}
 
 
 \subsection{2d interpolation}
 Consider the spline interpolation on the plane $(x,y)$, using a tensor
 product of splines defined as follow
 \begin{equation}
  \Lambda^{p, q}_{ij}(x,y)=\Lambda^p_i(x)\Lambda^q_j(y), \qquad
  \begin{aligned}[c]
   i&=1,\ldots,d_1=N_1+p,\\
   j&=1,\ldots,d_2=N_2+q,
  \end{aligned}
 \end{equation}
 
 where $(p,q)$ are the spline degrees , $(N_1,N_2)$, the number of knot
 intervals in each direction:
 
 \[ t_0 \leq x < t_{N_1}, \quad s_0 \leq y < s_{N_2}.\]
 
 \subsubsection{Spline coefficients (\texttt{GET\_SPLCOEF})}
 
 The 2d version of (\ref{eq:intEq}) can be written as:
 
 \begin{equation}
   \begin{split}
   \sum_{ij}\,c_{ij}\Lambda^p_{i}(x_\mu)\Lambda^q_{j}(y_\nu) &= f(x_\mu,y_\nu)\\
     &=f_{\mu\nu}\\
   \end{split}
 \qquad
  \begin{aligned}[c]
   \mu&=0,\ldots,N_{g1}\\
   \nu&=0,\ldots,N_{g2}
  \end{aligned}
 \end{equation}
 
 where $(x_\mu,y_\nu)$ are the \emph{interpolation sites} on the $(x,y)$ plane.
 These equations can be rearranged into
 \begin{eqnarray}
   \sum_i\,\bar{c}_{i\nu}\Lambda_i(x_\mu) &=& f_{\mu\nu},\\
   \sum_j\,c_{ij}\Lambda_j(y_\nu)         &=& \bar{c}_{i\nu}.
 \end{eqnarray}
 
 Such a 2 step procedure is implemented, using the 1d version
 of \texttt{GET\_SPLCOEF} in the 2d version of
 \texttt{GET\_SPLCOEF} by the following code fragment:
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
     TYPE(spline2d) :: sp
     DOUBLE PRECISION :: ctr(SIZE(c,2), SIZE(c,1))
     CALL get_splcoefn(sp%sp1, f, c)
     CALL get_splcoefn(sp%sp2, TRANSPOSE(c), ctr)
     c = TRANSPOSE(ctr)
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 
 \subsubsection{\texttt{PP} representation (\texttt{GRIDVAL})}
 Let us start with the \emph{spline representation}, for $f(x,y)$ with
 $t_\mu\leq x < t_{\mu+1}$ and $s_\nu \leq y < s_{\nu+1}$:
 \begin{equation}
   f(x,y) = \sum_{j=1}^{d_2}\left( \sum_{i=1}^{d_1}\,c_{ij}\Lambda^p_i(x)\right)\Lambda^q_j(y).
 \end{equation}
 
 Applying successively (\ref{eq:ppRep}) to the $x$ and $y$ functional dependency
 yields the following \texttt{PP} representation for $f(x,y)$:
 
 \begin{equation}
   f(x,y) = \sum^q_{k'=0}\left(\sum^p_{k=0}\,\Pi_{k\mu k'\nu}(x-t_\mu)^k\right)(y-s_\nu)^{k'}
   \label{eq:pp2Rep}
 \end{equation}
 
 The 2d \texttt{PP} coefficient is the \emph{tensor product} of 2 1d
 \texttt{PP} coefficients:
 
 \begin{equation}
   \Pi_{k\mu k'\nu} = \sum_{l'=1}^{d_2}\left(\sum_{l=1}^{d_1}\,c_{ll'}\,\alpha_{l\mu k}\right)\alpha'_{l'\nu k'}.
 \label{eq:pp2Coef}
 \end{equation}
 
 where $\alpha$ and $\alpha'$ are respectively the derivatives (\ref{eq:derSpl}) of all splines
 in $x$ and $y$ direction. All the derivatives of $f$ can be deduced straightforwardly
 from the \texttt{PP} representation (\ref{eq:pp2Rep}).
 
 In the present version, the 2d \texttt{GRIDVAL} can be called, either for (1) points
 on a 2d structured mesh: \texttt{XP(1:NPX), YP(1:NPY)} and returns the array
 \texttt{FP(1:NPX,1:NPY)}, or (2) with a 1d sequence of points
 \texttt{XP(1:NP), YP(1:NP)} and returns the 1d array \texttt{FP(1:NP)}.
 
 
 \section{Finite Elements using Splines}
 \subsection{The weak form}
 \subsection{The matrix assembly}
 \subsection{The boundary conditions}
 \subsubsection{Dirichlet condition}
 Dirichlet BC can be simply applied by imposing the conditions on the
 spline coefficients and the boundary point.
 For the BC $u(x=0)=u_{1} = c$ for example,
 the discrete equations can be expressed as:
 
 \begin{equation}
      \left(\begin{matrix}
        1 & 0 & \cdots \\
        A_{21} & A_{22} & \cdots \\
        \vdots & \vdots & \ddots \\
      \end{matrix}\right)
      \left(\begin{matrix}
        u_{1} \\
        u_{2} \\
        \vdots\\
        u_{N} \\
      \end{matrix}\right) =
      \left(\begin{matrix}
        c \\
        f_{2} \\
        \vdots\\
        f_{N} \\
      \end{matrix}\right)
 \end{equation}
 A more appropriate transformed system which
 preserves any symmetry of the original system is:
 
 \begin{equation}
      \left(\begin{matrix}
        1 & 0 & \cdots \\
        0 & A_{22} & \cdots \\
        \vdots & \vdots & \ddots \\
      \end{matrix}\right)
      \left(\begin{matrix}
        u_{1} \\
        u_{2} \\
        \vdots\\
        u_{N} \\
     \end{matrix}\right) =
      \left(\begin{matrix}
        c \\
        f_{2} -cA_{21}\\
        \vdots\\
        f_{N} -cA_{N1}\\
      \end{matrix}\right)
 \end{equation}
 
 
 \subsubsection{Unicity on the axis}
 Denoting the $N$ solutions at the axis by $(u_1, \ldots, u_N)$  , and
 their transforms by $(\hat u_1, \ldots, \hat u_N)$ defined by
 
 \begin{equation} \begin{array}{ccc}
      u_1-u_N = \hat u_1  &  &   u_1 = \hat u_1 + \hat u_N   \\
      u_2-u_N = \hat u_2  &  &   u_2 = \hat u_2 + \hat u_N   \\
      \vdots        & \Longrightarrow & \vdots               \\
      u_{N-1}-u_N = \hat u_{N-1}  &  &   u_{N-1} = \hat u_{1-1} + \hat u_N   \\
      u_N = \hat u_N      &  &   u_N = \hat u_N,
     \end{array}  \label{eq:unicity1} \end{equation}
 the unicity condition can be specified by simply imposing
 
 \begin{equation}
   \hat u_1=\hat u_2=\ldots=\hat u_{N-1}=0. \label{eq:unicity2}
 \end{equation}
 From (\ref{eq:unicity1}), the \emph{transformation matrix} \(\mathbf U\) is defined
 as
 
 \begin{equation}
   \mathbf{u} = \mathbf{ U \cdot\hat u}, \qquad \mathbf{U} =
      \left(\begin{matrix}
         1 & 0 & \dots & 0 & 1 \\
         0 & 1 & \dots & 0 & 1 \\
           &   & \ddots&   & \vdots \\
         0 & 0 & \dots & 1 & 1 \\
         0 & 0 & \dots & 0 & 1
      \end{matrix}\right), \quad \mathbf{U^{T}} =
      \left(\begin{matrix}
         1 & 0 & \dots & 0 & 0 \\
         0 & 1 & \dots & 0 & 0 \\
           &   & \ddots&   & \vdots \\
         0 & 0 & \dots & 1 & 0 \\
         1 & 1 & \dots & 1 & 1
      \end{matrix}\right).
 \end{equation}
 
 
 \paragraph{Matrix product \( \mathbf{A\cdot U}\)}
 \begin{equation}
 \mathbf{ A\cdot U} =
      \left(\begin{array}{lllll}
         A_{1,1} & A_{1,2}  & \dots & A_{1,N-1} &  \sum_{j} A_{1,j}  \\
         A_{2,1} & A_{2,2}  & \dots & A_{2,N-1} &  \sum_{j} A_{2,j}  \\
           &   & \ddots&   & \vdots \\
         A_{N-1,1} & A_{N-1,2}  & \dots & A_{N-1,N-1} &  \sum_{j}A_{N-1,j}  \\
         A_{N,1} & A_{N,2}  & \dots & A_{N,N-1} &  \sum_{j}A_{N,j}
      \end{array}\right).
 \end{equation}
 Thus \emph{right multiply by \(\mathbf{U}\)} is equivalent to put the
 \emph{the sum of each row on the last column}.
 
 \paragraph{Matrix product \( \mathbf{ U^T \cdot A}\)}
 \begin{equation}
 \mathbf{ U^T \cdot A} =
      \left(\begin{array}{lllll}
         A_{1,1} & A_{1,2}  & \dots & A_{1,N-1} &  A_{1,N}  \\
         A_{2,1} & A_{2,2}  & \dots & A_{2,N-1} &  A_{2,N}  \\
           &   & \ddots&   & \vdots \\
         A_{N-1,1} & A_{N-1,2}  & \dots & A_{N-1,N-1} & A_{N-1,N}  \\
         \sum_{i}A_{i,1} & \sum_{i}A_{i,2} & \dots & \sum_{i}A_{i,N-1} &
         \sum_{i}A_{i,N}
      \end{array}\right).
 \end{equation}
 Thus \emph{left multiply by \(\mathbf{\hat U}\)} is equivalent to put the
 \emph{the sum of each column  on the last row}.
 
 \paragraph{Product \( \mathbf{\hat U \cdot b}\)}
 \begin{equation}
 \mathbf{\hat b} = \mathbf{U^T\cdot b} =
      \left(\begin{array}{l}
         b_1 \\
         b_2 \\
         \vdots \\
         b_{N-1} \\
         \sum_{i} b_{i}
      \end{array}\right),
 \end{equation}
 
 \paragraph{Transformation of the original matrix equation}
 The full original linear system, obtained from the discretization of the
 2D \(r,\theta\) polar coordinates can be written as:
 
 \begin{equation}
      \left(\begin{array}{ll}
        \mathbf{A} & \mathbf{B} \\
        \mathbf{C} & \mathbf{D}
      \end{array}\right)
      \left(\begin{array}{l}
        \mathbf{u} \\
        \mathbf{v}
      \end{array}\right) =
      \left(\begin{array}{l}
        \mathbf{b} \\
        \mathbf{c}
      \end{array}\right), \label{eq:orig_matrix_eq}
 \end{equation}
 where the solution array is split into the solutions \(\mathbf{u}\) at \(r=0\) and
 the solutions \(\mathbf{v}\) on the remaining domain. The transformed system can
 thus be written as
 
 \begin{equation*}
      \left(\begin{array}{ll}
        \mathbf{U^T} & 0 \\
        0            & \mathbf{I}
      \end{array}\right)
      \left(\begin{array}{ll}
        \mathbf{A} & \mathbf{B} \\
        \mathbf{C} & \mathbf{D}
      \end{array}\right)
      \left(\begin{array}{ll}
        \mathbf{U} & 0 \\
        0            & \mathbf{I}
      \end{array}\right)
      \left(\begin{array}{l}
        \mathbf{\hat u} \\
        \mathbf{v}
      \end{array}\right) =
      \left(\begin{array}{ll}
        \mathbf{U^T} &0  \\
        0            & \mathbf{I}
      \end{array}\right)
      \left(\begin{array}{l}
        \mathbf{b} \\
        \mathbf{c}
      \end{array}\right),
 \end{equation*}
 
 \begin{equation}
    \Longrightarrow
      \left(\begin{array}{cc}
        \mathbf{U^TAU} & \mathbf{U^TB} \\
        \mathbf{CU} & \mathbf{D}
      \end{array}\right)
      \left(\begin{array}{l}
        \mathbf{\hat u} \\
        \mathbf{v}
      \end{array}\right) =
      \left(\begin{array}{c}
        \mathbf{U^Tb} \\
        \mathbf{c}
      \end{array}\right),
 \end{equation}
 Notice that the transformation preserves any symmetry existing in the original system
 (\ref{eq:orig_matrix_eq}). The transformed matrix is finally given in the following where
 only the modified elements are shown and the sum is only over the first \(N\)
 points in \(\theta\) direction. The \(\times\) symbol denotes unmodified elements.
 
 \begin{equation}
      \left(\begin{array}{lllllll}
         \times & \times & \times & \times &  \sum_{j} A_{1,j}  & \times & \times \\
         \times & \times & \times & \times &  \sum_{j} A_{2,j}  & \times & \times \\
         \times & \times & \times & \times &  \vdots            & \times & \times \\
         \times & \times & \times & \times & \sum_{j} A_{N-1,j} & \times & \times \\
          \sum_{i}A_{i,1} & \sum_{i}A_{i,2}  & \dots & \sum_{i}A_{i,N-1} &
          \sum_{i,j}A_{i,j} &  \sum_{i}A_{i,N+1} & \dots \\
         \times & \times & \times & \times & \sum_{j} A_{N+1,j} & \times & \times \\
         \times & \times & \times & \times &  \vdots            & \times & \times
      \end{array}\right)
 \end{equation}
 Only the \(N^{th}\) column and the  \(N^{th}\) row are affected by the transformation.
 Applying now the unicity condition (\ref{eq:unicity2}) the final transformed system
 reads:
 
 \begin{equation}
      \left(\begin{array}{lllllll}
         1 & 0 & \dots & 0 & 0 & 0 & 0 \\
         0 & 1 & \dots  & 0 & 0 & 0 & 0 \\
         0 & 0 & \ddots & 0 &  \vdots            & 0 & 0 \\
         0 & 0 & \dots  & 1 & 0 & 0 & 0 \\
         0 & 0 & \dots  & 0 & \sum_{i,j}A_{i,j} &  \sum_{i}A_{i,N+1} & \dots \\
         0 & 0 & \dots  & 0 & \sum_{j} A_{N+1,j} & \times & \times \\
         0 & 0 & \dots  & 0 &  \vdots            & \times & \times
      \end{array}\right)
      \left(\begin{array}{l}
         \hat u_1   \\
         \hat u_2   \\
         \vdots\\
         \hat u_{N-1}\\
         \hat u_{N} \\
         u_{N+1} \\
         \vdots
      \end{array}\right) =
      \left(\begin{array}{l}
         0   \\
         0   \\
         \vdots\\
         0   \\
         \sum_{i} b_{i} \\
         b_{N+1} \\
         \vdots
      \end{array}\right).
 \end{equation}
 
 
 \begin{thebibliography}{99}
 \bibitem{deBoor} C. de Boor, \emph{A Practical Guide to Splines}, Applied
   Mathematical Sciences, vol.~27 (Springer, NY, 2001).
 \bibitem{Golub} G.H. Golub, C.F. Van Loan, \emph{Matrix Computation, 3rd
   Edition}, p.5 (The John Hopkins University Press, 1996).
 \end{thebibliography}
 
 \end{document}
diff --git a/docs/manual/neuman.tex b/docs/manual/neuman.tex
index 34a8550..71ee2a6 100644
--- a/docs/manual/neuman.tex
+++ b/docs/manual/neuman.tex
@@ -1,141 +1,141 @@
 %
 % @file neuman.tex
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 \documentclass[a4paper]{article}
 \usepackage{amsmath}
 \title{\tt Some Notes on Boundary Conditions}
 \author{Trach-Minh Tran}
 \date{March 2012}
 
 \begin{document}
 \maketitle
 
 \section{Neumann BC as an {essential} BC}
 The original equation:
 \begin{equation}
   \mathbf{A \cdot u} = \mathbf{b}
 \end{equation}
 with the Neumann BC (1D case):
 \begin{equation}
   \alpha u_1 + \beta u_2 = c.
 \end{equation}
 From Eq.(20) of \cite{BSPLINES}:
 \begin{equation}
   \beta = -\alpha =\frac{p}{\Delta_1}
 \end{equation}
 where $p$ is the degree of spline and $\Delta_1$ is the lenght of the
 first insterval.
 
 Transformation $(u_1, \ldots, u_n) \Rightarrow (\hat u_1, \ldots, \hat
 u_n)$ defined by
 \begin{equation}
   \begin{array}{ccc}
      \alpha u_1 + \beta u_2 = \hat u_1  &  &   u_1 = \frac{1}{\alpha}\hat u_1 - \frac{\beta}{\alpha}\hat u_2   \\
      u_2 = \hat u_2  &  &   u_2 = \hat u_2   \\
      \vdots        & \Longrightarrow & \vdots               \\
      u_N = \hat u_N      &  &   u_N = \hat u_N.
     \end{array}
 \end{equation}
 The original Neumann BC becomes now a \emph{inhomogeneous Dirichlet}
 BC on $\mathbf{\hat u}$:
 \begin{equation}
   \hat u_1 = c.
 \end{equation}
 The transformed linear system can be written as:
 \begin{equation}
   \mathbf{(U^T\cdot A \cdot U)\cdot  \hat u} = \mathbf{U^T\cdot  b},
 \end{equation}
 where $\mathbf{U}$ is given by
 
 \begin{equation}
   \mathbf{U} =
      \left(\begin{matrix}
         \frac{1}{\alpha} & -\frac{\beta}{\alpha} & \dots & 0 \\
         0 & 1 & \dots & 0 \\
           &   & \ddots& \vdots \\
         0 & 0 & \dots & 1
      \end{matrix}\right)
 \end{equation}
 
 Thus, all the symmetry, hermiticity or positivity properties of the
 original matrix are preserved with this matrix transformation!
 
 \section{Neumann BC as a \emph{natural} BC}
 Multiplying the 1D Sturm-Liouville equation (see section 1.1.1 of
 \cite{SOLVERS}) by spline $\Lambda_j(x)$ and integrating by parts, we
 obtain the following boundary terms:
 
 \begin{equation}
 -\Lambda_j(L) C_1(L) \phi'(L)  + \Lambda_j(0) C_1(0) \phi'(0)
 \end{equation}
 To impose $\phi'(0) = a$ and noting that $\Lambda_j(0)=\delta_{j1}$,
 you only need to add $[-aC_1(0)]$ to the first element of the
 RHS. Likewise, for the BC $\phi'(L) = b$ you only need to add
 $[bC_1(L)]$ to the last element of the RHS. No matrix manipulation (as
 for the \emph{essential} BC) is required! Notice that if $a$ or $b$ is
 zero, nothing needs to be done to impose these BC. That's the reason why
 it is called \emph{natural} BC!
 
 A subtle point to be noted here is that using \emph{natural} BC,
 $\phi'(0)$ \emph{is not} exaclty equal to $a$, althought it should
 converge to $a$ as $(\Delta x)^p$ where $p$ is the spline degree,
 while using the \emph{essential} BC, $\phi'(0)=a$ is \emph{exact}!
 
 \section{Diffusion Equation using second order time implicit method}
 Let rewrite Eq.(74) of your notes in vector form and replace the
 unkowns $n$ by $f$:
 \begin{equation}
   \mathbf{B} \frac{d \mathbf{f}}{dt} = \mathbf{M\cdot f}.
 \end{equation}
 Using a \emph{second order time centered} discretization,
 
 \begin{equation}
   \begin{split}
   \mathbf{B} \left(\frac{\mathbf{f}^{n+1}-\mathbf{f}^{n}}{\Delta t}\right) &=
   \mathbf{M} \left(\frac{\mathbf{f}^{n+1}+\mathbf{f}^{n}}{2}\right) \\
 \Rightarrow &
  \left(\mathbf{B} -\frac{\Delta t}{2}
    \mathbf{M}\right)\mathbf{f}^{n+1} =
  \left(\mathbf{B} +\frac{\Delta t}{2}
    \mathbf{M}\right)\mathbf{f}^{n}
       \end{split}
 \end{equation}
 \emph{Essential} BC has to be imposed on the matrix
 \begin{equation}
   \mathbf{B} -\frac{\Delta t}{2} \mathbf{M}
 \end{equation}
 while \emph{natural} BC is introduced while deriving the weak form
 leading to the matrix $M$.
 This method is \emph{unconditionnaly stable} and second order in
 time. When linear splines are used for the space discretization, this
 scheme is similar to the well-known \emph{Cranck-Nicolson} (see for
 example Wikipedia) discretization for parabolic PDE.
 
 \begin{thebibliography}{99}
 \bibitem{BSPLINES} {\tt BSPLINES} Reference Guide.
 \bibitem{SOLVERS} {\tt The SOLVERS in BSPLINES} Reference Guide.
 \end{thebibliography}
 
 \end{document}
diff --git a/docs/manual/solvers.tex b/docs/manual/solvers.tex
index 8abdf13..5c4bbf8 100644
--- a/docs/manual/solvers.tex
+++ b/docs/manual/solvers.tex
@@ -1,2818 +1,2818 @@
 %
 % @file solvers.tex
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 \documentclass[a4paper]{article}
 \usepackage{linuxdoc-sgml}
 \usepackage{graphicx}
 \usepackage{hyperref}
 \usepackage{amsmath}
 %\usepackage{verbatim}
 %\usepackage[notref]{showkeys}
 
 \title{\tt The Solvers in BSPLINES}
 \author{Trach-Minh Tran}
 \date{v0.6, December 2011}
 \abstract{Implementation of a common simple interface to popular
 solver packages (LAPACK, PARDISO, WSMP, PETSc, etc.). The main goal is to
 provide an easy access to these packages in order to solve elliptic and
 parabolic as well as some types of integro-differential
 equations.}
 
 \begin{document}
 \maketitle
 \tableofcontents
 
 \section{Matrix form of the problem}
 \subsection{Getting started}
 \subsubsection{A one-dimensional problem}
 
 Let us start with the one-dimensional Sturm-Liouville differential
 equation:
 
 \begin{equation*}
   -\frac{d}{dx} \left[C_1(x) \frac{d\phi}{dx}\right] + C_2(x)\phi  = \rho,
 \end{equation*}
 on the domain $0\leq x \leq L$ with suitable boundary conditions.
 On a grid with $N$ intervals, the discretized solution $\phi$, using
 the splines $\Lambda_i(x)$ of order $p$ can be written as
 \begin{equation}
 \label{sol1d}
   \phi(x) = \sum_{i=0}^{d-1} \phi_i\Lambda_i(x),
 \end{equation}
 where \cite{BSPLINES}
 \begin{equation*}
   d =
   \begin{cases}
     N   & \text{if $\phi$ is periodic}, \\
     N+p &  \text{otherwise},
   \end{cases}
 \end{equation*}
 and $\phi_i$ are the unknowns of the following matrix equation:
 \begin{equation}
 \label{matEq1d}
   \sum_{i'=0}^{d-1} A_{ii'}\phi_{i'} = \rho_i, \qquad i=0,\ldots, d-1.
 \end{equation}
 Here the matrix $A_{ii'}$ and the right-hand-side $\rho_i$ are
 respectively given by:
 \begin{equation}
   \begin{split}
   \label{matCoef1d}
   A_{ii'} =& \int_{0}^{L}\!dx\,C_1\Lambda_{i}^{'}\Lambda_{i'}^{'}
            + \int_{0}^{L}\!dx\,C_2\Lambda_{i}\Lambda_{i'}, \\
   \rho_i =& \int_{0}^{L}dx \rho\Lambda_i.\\
   \end{split}
 \end{equation}
 For more general differential operators, the matrix coefficients $A_{ii'}$ can be
 written as a sum of contributing matrices of the form
 \begin{equation}
   \label{mat1d}
   A_{ii'} = \int_{0}^{L}\!dx\,C\Lambda_{i}^{\alpha}\Lambda_{i'}^{\alpha'},
 \end{equation}
 where $\Lambda_{i}^{\alpha}$ denotes the $\alpha^\text{th}$
 derivative of $\Lambda_{i}$.
 As the splines $\Lambda_i$ have a support of $p+1$ intervals, the
 matrix is sparse and its
 coefficients $A_{ii'}$ are non-zero only for $|i-i'| \leq p$: hence the
 matrix has a band structure of bandwidth equal to $2p+1$
 if the operator is purely differential. For an
 integral equation such as
 \begin{equation*}
   \int_{0}^{L}\!dx' K(x,x')\phi(x') = \rho(x),
 \end{equation*}
 the discretization results in a \emph{dense} matrix of the form
 \begin{equation}
   \label{matIntg1d}
   A_{ii'} = \int_{0}^{L}\!dx \Lambda_{i}(x) \int_{0}^{L}\!dx'
   K(x,x')\Lambda_{i'}(x').
 \end{equation}
 Note that when the kernel is separable $K(x,x') = U(x)V(x')$, the
 matrix $A_{ii'}$  is a \emph{dyadic}:
 \begin{equation}
   A_{ii'} = \int_{0}^{L}\!dx U(x) \Lambda_{i}(x) \int_{0}^{L}\!dx
   V(x)\Lambda_{i'}(x) = U_iV_{i'}.
 \end{equation}
 
 \subsubsection{Periodic boundary conditions}
 The splines $\Lambda_i$ are $N$-periodic ($\Lambda_{i+N}(x)=\Lambda_i(x-L)$). This
 property can be easily enforced while constructing both $\rho_i$
 and the matrix $A_{ii'}$.  This results in a solution
 $\phi_i$ which is also $N$-periodic.
 
 \subsubsection{Non-periodic boundary conditions}
 In {\tt BSPLINES} \cite{BSPLINES}, the constructed non-periodic splines are such that at the
 boundaries $x=0$ and $x=L$:
 \begin{equation}
   \Lambda_i(0) = \delta_{i,0}, \qquad \Lambda_i(L) = \delta_{i,N+p-1},
 \end{equation}
 which imply that, using (\ref{sol1d})
 \begin{equation}
   \phi(0) = \phi_0, \qquad \phi(L) =  \phi_{N+p-1}.
 \end{equation}
 It is thus possible to impose the Dirichlet boundary conditions
 by a simple modification of the matrix $A_{ii'}$ as shown in
 Appendix~\ref{DirichletCond}.
 
 
 \subsection{Problems in more dimensions}
 \subsubsection{Two-dimensional equations}
 The results obtained above can be extended in a
 straightforward manner. Assuming, for example a
 \emph{polar like} $(r,\theta)$ coordinate system,
 with the discretized solution and the right-hand side
 written as:
 \begin{equation}
 \label{discreteEq2d}
   \begin{split}
     \phi(r,\theta) &= \sum_{i=0}^{N_r+p_r-1}\sum_{j=0}^{N_\theta-1}
     \phi_{ij}\Lambda_i(r) \Lambda_j(\theta) \\
     \rho_{ij} &= \int_{0}^{R}\!dr \int_{0}^{2\pi} \!d\theta J(r,\theta) \rho(r,\theta)
     \Lambda_i(r) \Lambda_j(\theta),\\
   \end{split}
 \end{equation}
 where $J(r,\theta)$ is the Jacobian, the matrix equation to solve is
 \begin{equation}
     \sum_{i'=0}^{N_r+p_r-1}\sum_{j'=0}^{N_\theta-1}
     A_{iji'j'}\phi_{i'j'} = \rho_{ij},
 \end{equation}
 with the matrix $A_{iji'j'}$ expressed as a sum of matrices of the
 form:
 \begin{equation}
   \label{mat2d}
   A_{iji'j'} = \int_{0}^{R}\!dr  \int_{0}^{2\pi}\!d\theta\,
   C(r,\theta)\,\Lambda_{i}^{\alpha}(r)\Lambda_{i'}^{\alpha'}(r)\,
   \Lambda_{j}^{\beta}(\theta)\Lambda_{j'}^{\beta'}(\theta).
 \end{equation}
 
 \subsubsection{Three-dimensional equations}
 Likewise, for the three-dimension case, assuming for example a
 \emph{toroidal like} $(r,\theta, \varphi)$ coordinate system,
 with the discretized solution and the right-hand side
 written as:
 \begin{equation}
 \label{discreteEq3d}
   \begin{split}
     \phi(r,\theta, \varphi) &=
     \sum_{i=0}^{N_r+p_r-1}\sum_{j=0}^{N_\theta-1}
     \sum_{k=0}^{N_\varphi-1}
     \phi_{ijk}\Lambda_i(r) \Lambda_j(\theta)  \Lambda_k(\varphi) \\
     \rho_{ijk} &= \int_{0}^{R}\!dr \int_{0}^{2\pi} \!d\theta
     \int_{0}^{2\pi} \!d\varphi J(r,\theta,\varphi) \rho(r,\theta,\varphi)
     \Lambda_i(r) \Lambda_j(\theta) \Lambda_k(\varphi),\\
   \end{split}
 \end{equation}
 where $J(r,\theta,\varphi)$ is the Jacobian, the matrix equation to solve is
 \begin{equation}
     \sum_{i'=0}^{N_r+p_r-1}\sum_{j'=0}^{N_\theta-1}\sum_{k'=0}^{N_\varphi-1}
     A_{ijki'j'k'}\phi_{i'j'k'} = \rho_{ijk},
 \end{equation}
 with the matrix $A_{ijki'j'k'}$ expressed as a sum of matrices of the
 form:
 \begin{equation}
   \label{mat3d}
   A_{ijki'j'k'} = \int_{0}^{R}\!dr
   \int_{0}^{2\pi}\!d\theta\  \int_{0}^{2\pi}\!d\varphi\,
   C(r,\theta,\varphi)\,\Lambda_{i}^{\alpha}(r)\Lambda_{i'}^{\alpha'}(r)\,
   \Lambda_{j}^{\beta}(\theta)\Lambda_{j'}^{\beta'}(\theta)
   \Lambda_{k}^{\gamma}(\varphi)\Lambda_{k'}^{\gamma'}(\varphi).
 \end{equation}
 
 \subsubsection{Unicity condition}
 In the case of the polar coordinates $(r,\theta)$ considered above,
 the unicity condition on the axis $r=0$ should be imposed. It can be
 enforced by modifications of the matrix $A$ as described in
 Appendix~\ref{unicityCond}.
 
 \subsubsection{One-dimensional numbering}
 For two-dimensional (three-dimensional) problems, the solution
 $\phi_{ij}$ ($\phi_{ijk}$) as well as the right-hand-side $\rho_{ij}$
 ($\rho_{ijk}$) can be conveniently casted into one-dimensional
 arrays. As an example, by numbering first the last index, we obtain the
 following mappings:
 \begin{equation}
 \label{map1d}
   \mu  =
   \begin{cases}
      j + iN_\theta  & \text{two-dimensional case} \\
      k + (j + iN_\theta)N_\varphi & \text{three-dimensional case} \\
   \end{cases}
 \end{equation}
 Using such a one-dimensional numbering, the matrix equation for the two and
 three dimensional cases takes a more conventional form:
 \begin{equation}
   \sum_{\mu'=0}^{r-1} A_{\mu\mu'} \phi_{\mu'} = \rho_\mu,
 \end{equation}
 with the respective matrix ranks $r=(N_r+p_r)N_\theta$ and
 $r=(N_r+p_r)N_\theta N_\varphi$. For a pure differential operator, the
 matrix $A_{\mu\mu'}$ has a band structure of bandwidth
 $b=2(p_r+1)N_\theta-1$ and $b=2(p_r+1)N_\theta N_\varphi-1$ respectively. It is
 important to note that, except for the one-dimensional problem, there
 are \emph{many} zeros inside the matrix band!
 
 \section{The module {\tt MATRIX}}
 \subsection{Interface}
 
 The Fortran module {\tt MATRIX} contains easy-to-use routines to solve the
 matrix equation formulated in the previous section, using the direct
 solvers of LAPACK. The different matrix storage formats are defined,
 using the Fortran derived datatypes. The different types in the
 present version are listed in Table~\ref{matTypes}.
 
 \begin{table}[h]
   \centering
   \begin{tabular}{|l|l|}\hline
     {\tt gemat} & General dense matrix \\
     {\tt gbmat} & General band matrix \\
     {\tt pbmat} & Symmetric positive-definite band matrix \\
     {\tt periodic\_mat} & Matrix obtained for example in \\
                       & one-dimensional periodic problems\\
     \hline
   \end{tabular}
   \caption{The matrix types}
   \label{matTypes}
 \end{table}
 
 These types define {\tt DOUBLE PRECISION} real matrices.
 {\tt DOUBLE COMPLEX} matrices are declared by prefixing
 these types with the letter ``{\tt z}'', for example {\tt zgbmat}.
 Note that {\tt zpbmat} defines a \emph{hermitian} positive-definite
 complex matrix.
 The \emph{generic} routines are defined for each of these types in Table~\ref{matRoutines}.
 Note that the routine {\tt updtmat} is  mainly used for the matrix assembly
   while {\tt getxxx} and {\tt putxxx} are rather used to modify the
   matrix, for example to impose boundary conditions.
 
 \begin{table}[h]
 \centering
 \begin{tabular}{|l|l|}
   \hline
   {\tt init}      & Initializes the data structure \\
   {\tt destroy}   & Free the data structure memory \\
   \hline
   {\tt updtmat}   & Accumulates a value to the element $A_{ij}$ \\
   {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
                                row $i$, column $j$ \\
   {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
                                  row $i$, column $j$ \\
   {\tt vmx}       & Returns the matrix-vector product \\
   {\tt mcopy}     & Copy a matrix to another matrix \\
   {\tt maddto}    & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
   {\tt determinant} & Returns the matrix determinant \\
   \hline
   {\tt factor}    & Computes the LU (Cholesky for symmetric/hermitian \\
                   & matrix) factorization \\
   {\tt bsolve}    & Solves the linear system using the factorized matrix \\
     \hline
 \end{tabular}
   \caption{The generic routines in the {\tt MATRIX} module}
   \label{matRoutines}
 \end{table}
 The complete description of each routine is given in
 Appendix~\ref{matRef}. More information on how to use it can be obtained by
 {\tt greping} its name on the examples found in the {\tt examples/} directory.
 
 \subsection{A two-dimensional example}
 \label{twodEx}
 Let's consider the Poisson equation using the cylindrical coordinates
 $(r,\theta)$:
 \begin{equation}
   -\frac{1}{r}\frac{\partial}{\partial r}
    \left[r\frac{\partial\phi}{\partial r}\right]
   -\frac{1}{r^2}\frac{\partial^2\phi}{\partial\theta^2}  = \rho,
   \qquad \phi(r=1,\theta) = 0.
 \end{equation}
 
 Assuming the exact solution
 \begin{equation*}
   \phi(r,\theta) = (1-r^2)r^m\cos m\theta,
 \end{equation*}
 the right-hand-side becomes
 \begin{equation*}
   \rho=4(m+1)r^{m}\cos m\theta.
 \end{equation*}
 The matrix and the right hand-side of the discretized problem are computed as
 
 \begin{equation}
   \begin{split}
   A_{iji'j'} &= \int_{0}^1\!dr  \int_{0}^{2\pi}\!d\theta\,\left[
   r\,\Lambda'_{i}(r)\Lambda'_{i'}(r)\,
   \Lambda_{j}(\theta)\Lambda_{j'}(\theta) +
    \frac{1}{r}\,\Lambda_{i}(r)\Lambda_{i'}(r)\,
   \Lambda'_{j}(\theta)\Lambda'_{j'}(\theta) \right] \\
   \rho_{ij} &= \int_{0}^1\!dr \int_{0}^{2\pi} \!d\theta\,\,r \rho(r,\theta)
     \,\,\Lambda_i(r) \Lambda_j(\theta).
   \end{split}
 \end{equation}
 In the example  {\tt pde2d.f90} this problem is treated in detail.
 In the following, only the calls to the {\tt MATRIX} routines
 are reviewed to show how the matrix problem is solved using the {\tt
   MATRIX} module.
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
 !
 !  Declare a General Band matrix
 !
   USE matrix
   USE conmat_mod
   TYPE(gbmat) :: mat
 !
 !  Rank and bandwidth. nidbas(1) is the spline order in
 !  the first dimension r.
 !
   nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
   kl = (nidbas(1)+1)*ny -1     ! Number of sub-diagnonals
   ku = kl                      ! Number of super-diagnonals
   nterms = 2                   ! Number of terms in the weak form
 !
 !  Initialize the matrix data structure
 !
   CALL init(kl, ku, nrank, nterms, mat)
 !
 !  Construct the matrix, using 2D spline splxy
 !  and impose boundary conditions
 !
   CALL conmat(splxy, mat, coefeq_poisson)
   CALL ibcmat(mat, ny)
 !
 !  Compute the RHS, using the 2D spline splxy
 !  and impose boundary conditions
 !
   CALL disrhs(mbess, splxy, rhs)
   CALL ibcrhs(rhs, ny)
 !
 !   Factor the matrix and solve
 !
   CALL factor(mat)
   CALL bsolve(mat, rhs, sol)
 !
 ...
 CONTAINS
   SUBROUTINE coefeq_poisson(x, y, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x, y
     INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
     c(1) = x
     idt(1,1) = 1; idt(1,2) = 0
     idw(1,1) = 1; idw(1,2) = 0
 !
     c(2) = 1.d0/x
     idt(2,1) = 0;  idt(2,2) = 1
     idw(2,1) = 0;  idw(2,2) = 1
   END SUBROUTINE coefeq_poisson
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 
 Some explanations and remarks:
 \begin{itemize}
 \item The matrix construction is performed by {\tt conmat} which will
   be described later in section \ref{secCONMAT}. The
   \emph{weak form} is defined in the \emph{internal}
   procedure {\tt coefeq\_poisson} and passed as an argument to
    {\tt  conmat}. See section \ref{secCONMAT} for a detailed
    description of the variables {\tt c, idt, idw} returned by
    {\tt coefeq\_poisson}.
 \item Boundary conditions are imposed by modifications
   of the matrix in subroutine {\tt ibcmat} (see file {\tt the
   ibcmat.f90}), using the {\tt MATRIX}
   routines {\tt getrow, putrow, getcol, putcol}.
 \item The construction of the right-hand-side in {\tt disrhs} (see the
   file {\tt disrhs.f90}) is computed using a Gauss quadrature..
 \item Using the {\tt pbmat} type instead of {\tt gbmat} (the matrix in
   this example is symmetric and positive-definite!) requires only a few
   modifications of the program (see the complete example {\tt
     pde2d\_pb.f90}):
   \begin{itemize}
   \item Change the type {\tt gbmat} to {\tt pbmat} in all matrix declarations
   \item Change the list of arguments in the routine {\tt init} to
     ({\tt ku, nrank, nterms, mat})
   \item Small changes in the boundary conditions ({\tt ibcmat} and {\tt
     ibcrhs}) to take into account the symmetry.
   \end{itemize}
 \item The module {\tt MATRIX} can be used independently of {\tt
   BSPLINES} (which is used here only to compute the matrix and
   right-hand-side), for example in a problem discretized using
   Finite Differences.
 \end{itemize}
 
 \section{Sparse matrix storage}
 Using the \emph{band matrix format} for a pure differential operator
 requires to store a full bandwidth $b=2(p_r+1)N_\theta-1$ for the
 two-dimensional problem as shown in section 1, while there are only
 $(2p_r+1)^2$ non-zero elements per matrix row. In three-dimensional
 problem, it is much worse since $b=2(p_r+1)N_\theta N\varphi-1$ for
 $(2p_r+1)^3$ non-zero elements.
 
 In order to reduce the matrix storage, a solution consists of just
 storing the matrix non-zero elements and use the \emph{Sparse Direct
 Solvers}. With an optimal \emph{renumbering} strategy (or
 \emph{fill-in reducing ordering}), the
 size of the factored matrix can be expected to be smaller than the
 corresponding band matrix.
 
 An alternative is to use \emph{iterative} methods which usually need
 less memory.
 
 Such a sparse matrix is implemented in the {\tt SPARSE} module where
 each matrix row is represented by a \emph{\tt linked} list of elements
 with sorted column index. The data structure of this sparse matrix is
 wrapped up in a the Fortran data type {\tt spmat} for a real matrix and
 {\tt zspmat} for a complex matrix. Most of the generic routines which
 are already defined in the {\tt MATRIX} module are overloaded for these
 matrix types. They are listed in Table~\ref{spmatRoutines}. The complete
 documentation of these routines can be found in Appendix~\ref{spmatRef}.
 
 \begin{table}[h]
 \centering
 \begin{tabular}{|l|l|}
   \hline
   \emph{Matrix types} & {\tt spmat, zspmat} \\
   \hline\hline
   {\tt init}      & Initializes the data structure \\
   {\tt destroy}   & Free the data structure memory \\
   \hline
   {\tt updtmat}   & Accumulates a value to the element $A_{ij}$ \\
   {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
                                row $i$, column $j$ \\
   {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
                                  row $i$, column $j$ \\
   \hline
   {\tt get\_count}   & Get the number of non-zero elements in matrix \\
   \hline
 \end{tabular}
   \caption{The generic routines in the {\tt SPARSE} module}
   \label{spmatRoutines}
 \end{table}
 
 It should be noted that this module is \emph{not} used
 directly in solver problems. One usually uses instead modules which
 are specific to a type of (direct or iterative) solver. As will be
 shown in the next section, it is the routines in this solver module which
 directly calls the routines defined in the {\tt SPARSE} module during the matrix
 assembly.
 
 \section{Solvers using the module {\tt SPARSE}}
 All the solvers discussed in this section use initially the module
 {\tt SPARSE} to construct the sparse matrix. Once this construction
 procedure is complete, this matrix is converted to the (usually more
 efficient) format used by the solver.
 In a time-dependent simulation where the problem matrix
 changes but not the sparsity pattern, the subsequent matrix assembly
 will be performed directly on this solver's format.
 
 Thus for example, the first time {\tt updtmat} is called
 on a new matrix, it is the version from {\tt SPARSE}. Next, if
 {\tt  updtmat} is called again to modify the matrix, it will be the
 solver's version, unless the matrix is re-initialized by a call to
 {\tt destroy} followed by {\tt init}. This switch is completely
 transparent for the user as shown through an example in the next
 section.
 
 \subsection{The PARDISO direct solver}
 The interface to PARDISO~\cite{PARDISO} is implemented in the
 module {\tt PARDISO\_BSPLINES}.
 The matrix type (symmetric, hermitian, positive-definite) is set
 by the arguments {\tt nlsym, nlherm} and {\tt nlpos} passed to the
 generic routine {\tt init}. All the other generic routines defined in
 the module {MATRIX}, plus routines specific to the sparse solver,
 are listed in Table~\ref{pardisoRoutines}. The complete documentation
 of these routines is given in Appendix~\ref{pardisoRef}.
 
 
 \begin{table}[h]
 \centering
 \begin{tabular}{|l|l|}
   \hline
   \emph{Matrix types} & {\tt pardiso\_mat, zpardiso\_mat} \\
   \hline\hline
   {\tt init}      & Initializes the data structure \\
   {\tt destroy}   & Free the data structure memory \\
   \hline
   {\tt updtmat}   & Accumulates a value to the element $A_{ij}$ \\
   {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
                                row $i$, column $j$ \\
   {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
                                  row $i$, column $j$ \\
   {\tt vmx}       & Returns the matrix-vector product \\
   {\tt mcopy}     & Copy a matrix to another matrix \\
   {\tt maddto}    & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
   {\tt clear\_mat}& Set the matrix elements to zero.\\
   {\tt psum\_mat} & Parallel sum of matrices \\
   {\tt p2p\_mat}  & Point-to-point combine sparse matrix between 2 processes\\
   {\tt get\_count}& Get the number of non-zero elements in matrix \\
   \hline
   {\tt factor}    & Factorization \\
   {\tt bsolve}    & Solves the linear system using the factorized matrix \\
   {\tt to\_mat}   & Convert to PARDISO CSR matrix format \\
   {\tt reord\_mat}& Reordering and symbolic factorization \\
   {\tt numfact}   & Numerical factorization \\
     \hline
 \end{tabular}
   \caption{The generic routines in the {\tt PARDISO\_BSPLINES} module}
   \label{pardisoRoutines}
 \end{table}
 
 Below, a complete example solving a simple
 two-dimensional Poisson discretized by the 5 point Finite Difference
 method illustrates how to use the {\tt PARDISO\_BSPLINES} module.
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
 PROGRAM main
   USE pardiso_bsplines
   IMPLICIT NONE
   TYPE(pardiso_mat) :: amat
   DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:), arow(:)
   INTEGER :: nx=5, ny=4
   INTEGER :: n, nnz
   INTEGER :: i, j, irow, jcol
 !
   WRITE(*,'(a)', advance='no') 'Enter nx, ny: '
   READ(*,*) nx, ny
   n = nx*ny  ! Rank of the matrix
   ALLOCATE(rhs(n))
   ALLOCATE(sol(n))
   ALLOCATE(arow(n))
 !
   CALL init(n, 1, amat, nlsym=.TRUE.)  ! Pardiso mat, symmetric case
 !
 !  Construct the matrix and RHS
 !
   DO j=1,ny
      DO i=1,nx
         arow = 0.0d0
         irow = numb(i,j)
         arow(irow) = 4.0d0
         IF(i.GT.1)  arow(numb(i-1,j)) = -1.0d0
         IF(i.LT.nx) arow(numb(i+1,j)) = -1.0d0
         IF(j.GT.1)  arow(numb(i,j-1)) = -1.0d0
         IF(j.LT.ny) arow(numb(i,j+1)) = -1.0d0
         CALL putrow(amat, irow, arow)
         rhs(irow) = SUM(arow)   ! => the exact solution is 1
      END DO
   END DO
 !
   WRITE(*,'(a,i6)') 'Number of non-zeros of matrix', get_count(amat)
 !
 !  Factor the amat matrix (Reordering, symbolic and numerical factorization)
 !
   CALL factor(amat)
 !
 !  Back solve
 !
   CALL bsolve(amat, rhs, sol)
 !
 !   Check solutions
 !
   WRITE(*,'(/a/(10f8.4))') 'Computed sol', sol
   WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0))
 !
 !   Clean up
 !
   DEALLOCATE(rhs)
   DEALLOCATE(sol)
   DEALLOCATE(arow)
   CALL destroy(amat)
 CONTAINS
   INTEGER FUNCTION numb(i,j)
 !
 !  One-dimensional numbering
 !  Number first x then y
 !
     INTEGER, INTENT(in) :: i, j
     numb = (j-1)*nx + i
   END FUNCTION numb
 END PROGRAM main
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 
 It should be noted that
 \begin{itemize}
 \item The routine {\tt putrow} in the matrix construction loop uses the
   version from the {\tt SPARSE} module to create dynamically the
   matrix row using the linked list.
 \item The routine {\tt factor} calls successively the matrix conversion
   {\tt to\_mat}, the reordering and symbolic factorization routine
   {\tt reord\_mat} and finally the numerical factorization {\tt
     numfact}. One could indeed call these three routines separately
   instead of the single call to {\tt factor},
 \item After solving the linear system, if the matrix is modified by
   calling for example {\tt putrow} again, it will modify
   directly the converted matrix and not on the {\tt spmat} matrix
   which is anyway \emph{destroyed} at the end of {\tt to\_mat}.
 \item If the matrix sparsity changes, the matrix should be
   re-initialized by calling the {\tt destroy} and {\tt init} routines.
 \end{itemize}
 
 Other examples can be found by running ``{\tt grep pardiso\_mat}''
 on the F90 files in the directory {\tt examples/}.
 
 \subsection{The WSMP direct solver}
 The interface to WSMP~\cite{WSMP} is implemented in the
 module {\tt WSMP\_BSPLINES}.
 The matrix type (symmetric, hermitian, positive-definite) is set
 by the arguments {\tt nlsym, nlherm} and {\tt nlpos} passed to the
 generic routine {\tt init}. All the other generic routines defined in
 the module {MATRIX}, plus routines specific to the sparse solver,
 are listed in Table~\ref{wsmpRoutines}. The complete documentation
 of these routines is given in Appendix~\ref{wsmpRef}.
 
 \begin{table}[h]
 \centering
 \begin{tabular}{|l|l|}
   \hline
   \emph{Matrix types} & {\tt wsmp\_mat, zwsmp\_mat} \\
   \hline\hline
   {\tt init}      & Initializes the data structure \\
   {\tt destroy}   & Free the data structure memory \\
   \hline
   {\tt updtmat}   & Accumulates a value to the element $A_{ij}$ \\
   {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
                                row $i$, column $j$ \\
   {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
                                  row $i$, column $j$ \\
   {\tt vmx}       & Returns the matrix-vector product \\
   {\tt mcopy}     & Copy a matrix to another matrix \\
   {\tt maddto}    & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
   {\tt clear\_mat}& Set the matrix elements to zero.\\
   {\tt psum\_mat} & Parallel sum of matrices \\
   {\tt p2p\_mat}  & Point-to-point combine sparse matrix between 2 processes\\
   {\tt get\_count}& Get the number of non-zero elements in matrix \\
   \hline
   {\tt factor}    & Factorization \\
   {\tt bsolve}    & Solves the linear system using the factorized matrix \\
   {\tt to\_mat}   & Convert to WSMP CSR matrix format \\
   {\tt reord\_mat}& Reordering and symbolic factorization \\
   {\tt numfact}   & Numerical factorization \\
     \hline
 \end{tabular}
   \caption{The generic routines in the {\tt WSMP\_BSPLINES} module}
   \label{wsmpRoutines}
 \end{table}
 
 The simple Poisson example using the {\tt PARDISO\_BSPLINES} module
 shown in the previous section can be easily adapted to the WSMP
 interface since there are only two lines to change: the {\tt USE} and
 the matrix {\tt TYPE} lines.
 
 Other examples of how to use this interface can be found by running
 ``{\tt grep wsmp\_mat}'' on  the F90 files the directory {\tt examples/}.
 
 The same solver functionality can be found in both the PARDISO and
 WSMP solvers as one can verify by comparing Table~\ref{pardisoRoutines}
 and Table~\ref{wsmpRoutines} or the description of routines in
 Appendix~\ref{pardisoRef} and Appendix~\ref{wsmpRef}.
 However, there
 is an important difference. While in PARDISO (and indeed also in LAPACK),
 it is possible to define several matrices to solve simultaneously, it
 appears that in WSMP, this is possible \emph{only} for symmetric and
 hermitian matrices: in the present 10.9 version, the
 routines to store and recall the solver context
 {\tt  WSTOREMAT/WRECALLMAT} which are present in the symmetric version of
 the library are missing in the general version!
 
 A separate module named {\tt PWSMP\_BSPLINES} added the MPI
 \emph{parallelization} capability provided by WSMP. This parallel version
 implements the same user interface as shown in Table~\ref{wsmpRoutines}.
 The following considerations should be however taken in to account:
 \begin{enumerate}
 \item The coefficient matrix {\tt amat} is partitioned into blocks of
 \emph{contiguous} rows, with their indices defined in
 the interval [{\tt amat\%istart,amat\%iend}] which is defined after the call to
 {\tt init}.
 \item Calls to the routine {\tt updtmat} to update the matrix coefficients
 should not specify a row index \emph{outside} this interval.
 On the other hand, {\tt getxxx} will return 0 and {\tt putxxx} will
 ignore it if a row index \emph{not} in the range [{\tt amat\%istart,amat\%iend}]
 is passed to them.
 \item An \emph{optional} MPI communicator can be given to {\tt init}
 using the keyword {\tt comm\_in}. By default, the communicator
 {\tt MPI\_COMM\_WORLD} is used.
 \end{enumerate}
 
 A complete example using {\tt PWSMP\_BSPLINES} can be found in
 {\tt examples/pde2d\_pwsmp.f90}.
 
 \subsection{The MUMPS direct solver}
 {\tt MUMPS}~\cite{MUMPS}  is a \emph{parallel sparse direct solver}
 using {\tt MPI} and is implemented in the module
 {\tt MUMPS\_BSPLINES}. User program using this module
 should be compiled and linked with {\tt MPI} even if only the
 serial version of the solver is needed, in which case the
 {\tt MPI\_COMM\_SELF} is passed to the initialization routine
 {\tt init} as an \emph{optional} argument with the keyword
 {\tt comm\_in}. Otherwise a valid {\tt MPI} communicator should be
 passed. By default {\tt comm\_in=MPI\_COMM\_SELF}. Note that it
 is possible to use both serial and parallel solvers in the same
 program to solve different matrix problems.
 
 As for {\tt PARDISO} and {\tt WSMP}, the same user interface to the
 {\tt MUMPS} solver is used and summarized in
 Table~\ref{mumpsRoutines}. The complete documentation
 of these routines is given in Appendix~\ref{mumpsRef}.
 
 \begin{table}[h]
 \centering
 \begin{tabular}{|l|l|}
   \hline
   \emph{Matrix types} & {\tt mumps\_mat, zmumps\_mat} \\
   \hline\hline
   {\tt init}      & Initializes the data structure \\
   {\tt destroy}   & Free the data structure memory \\
   \hline
   {\tt updtmat}   & Accumulates a value to the element $A_{ij}$ \\
   {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
                                row $i$, column $j$ \\
   {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
                                  row $i$, column $j$ \\
   {\tt vmx}       & Returns the matrix-vector product \\
   {\tt mcopy}     & Copy a matrix to another matrix \\
   {\tt maddto}    & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
   {\tt clear\_mat}& Set the matrix elements to zero.\\
   {\tt psum\_mat} & Parallel sum of matrices \\
   {\tt p2p\_mat}  & Point-to-point combine sparse matrix between 2 processes\\
   {\tt get\_count}& Get the number of non-zero elements in matrix \\
   \hline
   {\tt factor}    & Factorization \\
   {\tt bsolve}    & Solves the linear system using the factorized matrix \\
   {\tt to\_mat}   & Convert to WSMP CSR matrix format \\
   {\tt reord\_mat}& Reordering and symbolic factorization \\
   {\tt numfact}   & Numerical factorization \\
     \hline
 \end{tabular}
   \caption{The generic routines in the {\tt MUMPS\_BSPLINES} module}
   \label{mumpsRoutines}
 \end{table}
 
 
 \section{Fourier solver \cite{McMillan}}
 
 \subsection{The matrix equation in Fourier space}
 For a periodic one-dimensional problem, the solution $\phi_i$ and
 the right-hand-side $\rho_i$ in (\ref{matEq1d}) are
 $N$-periodic. Their Discrete Fourier Transform (DFT) can be defined as
 \begin{equation}
   \begin{split}
     \hat{\phi}_k = \sum_{j=0}^{N-1} \phi_j e^{i\frac{2\pi}{N}kj},  &\qquad
     \hat{\rho}_k = \sum_{j=0}^{N-1} \rho_j e^{i\frac{2\pi}{N}kj},  \\
     \phi_j = \frac{1}{N}\sum_{k=0}^{N-1} \hat{\phi}_k e^{-i\frac{2\pi}{N}kj},  &\qquad
     \rho_j = \frac{1}{N}\sum_{k=0}^{N-1} \hat{\rho}_k e^{-i\frac{2\pi}{N}kj}.
   \end{split}
 \end{equation}
 Taking the DFT of Eq.~(\ref{matEq1d}), we obtain the following matrix equation
 in Fourier space:
 \begin{equation}
 \label{Fourier1d}
   \sum_{k'=0}^{N-1} \hat{A}_{kk'}\hat{\phi}_{k'} = \hat{\rho}_{k},
 \end{equation}
 where $\hat{A}_{kk'}$ is the DFT of the original matrix. Following the
 notations in Eq.~(\ref{mat1d}) and assuming an \emph{equidistant} mesh
 with the interval $\Delta=L/N$, each of the DFT matrices of the
 weak form can be written as
 \begin{equation}
   \begin{split}
   \hat{A}_{kk'}
    &= \frac{1}{N}\sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj}
                  \sum_{j'=0}^{N-1} A_{jj'}e^{-i\frac{2\pi}{N}k'j'} \\
    &= \frac{1}{N}\int_{0}^L\!\!dx\,C(x) \,
          \sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x)\,
          \sum_{j'=0}^{N-1} e^{-i\frac{2\pi}{N}k'j'}
          \Lambda_{j'}^{\alpha'} (x) \\
    &= \frac{1}{N}\sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!\!dx\,C(x) \,
          \sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x)\,
          \sum_{j'=0}^{N-1} e^{-i\frac{2\pi}{N}k'j'}
          \Lambda_{j'}^{\alpha'} (x)
   \end{split}
 \end{equation}
 Note that each of the last two sums is over the splines which are non-zero
 at a given $x$. Using the translational symmetry of the periodic splines:
 \begin{equation*}
    \sum_j e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x) =
     \sum_j e^{i\frac{2\pi}{N}kj}
     \Lambda_{j-J}^\alpha(x-J\Delta) =
     e^{i\frac{2\pi}{N}kJ}\, \hat{\Lambda}_{k}^\alpha(x-J\Delta),
 \end{equation*}
 where we have defined the DFT of splines $\hat{\Lambda}_{k}(x)$ as
 \begin{equation}
   \hat{\Lambda}_{k}^\alpha(x) = \sum_j\Lambda_j^\alpha(x) e^{i\frac{2\pi}{N}kj},
 \end{equation}
 which are computed by the routine {\tt ft\_basfun} in the module
 {\tt BSPLINES} for any spline order $p$ and derivative $\alpha \le
 p$. The DFT matrices can now be written as:
 \begin{equation*}
   \hat{A}_{kk'} =
   \frac{1}{N}\sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!\!dx\,C(x) \,
    e^{i\frac{2\pi}{N}J(k-k')} \hat{\Lambda}_k^\alpha (x-J\Delta)\,
    \left[\hat{\Lambda}_{k'}^{\alpha'} (x-J\Delta)\right]^{*}.
 \end{equation*}
 
 Finally, making the variable transform $x\rightarrow x+J\Delta$ and
 defining the DFT of the weak-form coefficient $C$ as
 \begin{equation}
   \hat{C}_{k}(x) =  \sum_{J=0}^{N-1}C(x+J\Delta)\,e^{i\frac{2\pi}{N}Jk},
 \end{equation}
 the DFT of the matrix $\hat{A}_{kk'}$ can be calculated as an
 integration over the first interval:
 \begin{equation}
   \hat{A}_{kk'} = \frac{1}{N}\int_0^\Delta\!\!dx\,
                    \hat{C}_{k-k'}(x) \,\hat{\Lambda}_{k}^\alpha(x)
     \left[\hat{\Lambda}_{k'}^{\alpha'}(x)\right]^{*},
 \end{equation}
 which can be computed using again the same Gauss formula as before.
 For uniform $C$, $\hat A_{kk'}$ is diagonal and the matrix equation
 (\ref{Fourier1d}) reduces to a system of equations for the
 uncoupled Fourier modes.
 
 When $C$ is non-uniform, $\hat A_{kk'}$ is \emph{dense}.
 However in problems where the solution is expected to be
 ``smooth'', one can keep only a small number of Fourier modes, reducing
 thus the rank of  $\hat A_{kk'}$. Furthermore, if the coefficients
 $C(x)$ of the differential equations are very smooth, peaked at a few (low
 order) modes, the DFT matrix can become \emph{sparse}!
 
 The generalization to the two-dimensional problem (\ref{mat2d}) is
 straightforward:
 
 \begin{gather}
   \hat{A}_{im,i'm'} =
   \frac{1}{N_\theta}\int_0^R\!\!dr\left\{\int_0^{\Delta\theta}\!\!d\theta \,
     \hat{C}_{m-m'}(r,\theta) \,\hat{\Lambda}_{m}^\beta(\theta)
     \left[\hat{\Lambda}_{m'}^{\beta'}(\theta)\right]^{*}\right\}
     \Lambda_{i}^\alpha(r)\Lambda_{i'}^{\alpha'}(r) \\
     \hat{C}_{m}(r,\theta) =
     \sum_{j=0}^{N_\theta-1}C(r,\theta+j\Delta\theta)\, e^{i\frac{2\pi}{N_\theta}jm}.
 \end{gather}
 
 Likewise, for the three-dimensional problem (\ref{mat3d}), we obtain
 
 \begin{gather}
   \hat{A}_{imn,i'm'n'} =
   \frac{1}{N_\theta N_\varphi}\int_0^R\!\!dr\left\{\int_0^{\Delta\theta}\!\!d\theta
    \int_0^{\Delta\varphi}\!\!d\varphi \,
     \hat{C}_{m-m',n-n'}(r,\theta,\varphi) \,\hat{\Lambda}_{m}^\beta(\theta)
     \left[\hat{\Lambda}_{m'}^{\beta'}(\theta)\right]^{*}\,\hat{\Lambda}_{n}^\gamma(\varphi)
     \left[\hat{\Lambda}_{n'}^{\gamma'}(\varphi)\right]^{*}\right\}
     \Lambda_{i}^\alpha(r)\Lambda_{i'}^{\alpha}(r) \\
     \hat{C}_{mn}(r,\theta,\varphi) =
     \sum_{j=0}^{N_\theta-1}\sum_{k=0}^{N_\varphi-1}C(r,\theta+j\Delta\theta,\varphi+
     k\Delta\varphi)\, e^{i\frac{2\pi}{N_\theta}jm}\,e^{i\frac{2\pi}{N_\varphi}kn}.
 \end{gather}
 
 Note that for axi-symmetric systems where the coefficients $C$ do not
 depend on $\varphi$
 \begin{equation}
   \hat{C}_{mn} = \hat{C}_{mn}\delta_{n,0}
 \end{equation}
 and thus the three-dimensional problem reduces to a set of independent
 two-dimensional problems with
 \begin{equation}
   \begin{split}
     \hat{A}^n_{im.i'm'} &= M_n \hat{A}_{im.i'm'} \\
         M_n &= \int_0^{\Delta\varphi}\!\!d\varphi \left|\hat{\Lambda}_n(\varphi)\right|^2.
   \end{split}
 \end{equation}
 
 \subsection{Integral equation}
 The DFT matrices for differential operators derived above can be
 extended to an integral operator of the following form:
 \begin{equation}
   \int_{0}^{L}\!dx' K(x,x')\,\phi(x') = \rho(x),
 \end{equation}
 where $\phi(x)$ is $L$-periodic. Using the same FE discretization as
 above results in the following matrix in \emph{real} space:
 \begin{equation}
   A_{jj'} = \int_{0}^{L}\!dx\,\Lambda_j(x)\,\int_{0}^{L}\!dx'\,
   K(x,x')\,\Lambda_{j'}(x'),
 \end{equation}
 and its DFT
 \begin{equation}
   \hat{A}_{kk'} = \frac{1}{N}
   \sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!dx\,
   \sum_{J'=0}^{N-1}\int_{J'\Delta}^{(J'+1)\Delta}\!dx'\,K(x,x')\,
     e^{i\frac{2\pi}{N}kJ}\hat{\Lambda}_k(x-J\Delta)\,
     e^{-i\frac{2\pi}{N}k'J'}\left[\hat{\Lambda}_{k'}(x-J'\Delta)\right]^{*},
 \end{equation}
 Now, defining the DFT of the kernel as
 \begin{equation}
   \hat{K}_{kk'}(x,x') =
   \sum_{J=0}^{N-1}\sum_{J'=0}^{N-1}K(x+J\Delta,x'+J'\Delta)\,
   e^{i\frac{2\pi}{N}kJ}\,e^{-i\frac{2\pi}{N}k'J'},
 \end{equation}
 the final expression for the DFT of the matrix $\hat{A}_{kk'}$ reduces to
 \begin{equation}
    \hat{A}_{kk'} = \frac{1}{N}
    \int_0^\Delta\!dx\int_0^\Delta\!dx'\,\hat{K}_{kk'}(x,x')\,
      \hat{\Lambda}_k (x)\,
      \left[\hat{\Lambda}_{k'}(x')\right]^{*}.
 \end{equation}
 Again, notice that the dense matrix $\hat{A}$ can become \emph{sparse}
 if only a limited number of Fourier modes are retained in the DFT of
 the kernel $\hat{K}$.
 
 \subsection{A two-dimensional example with a non-uniform coefficient}
 As a check, we considered here a two-dimensional example similar to the example in
 section \ref{twodEx} but with a non-uniform coefficient:
 
 \begin{equation}
   -\frac{1}{r}\frac{\partial}{\partial r}
    \left[rC\frac{\partial\phi}{\partial r}\right]
   -\frac{1}{r^2}\frac{\partial}{\partial \theta}
    \left[C\frac{\partial\phi}{\partial \theta}\right]  = \rho.
 \end{equation}
 
 With $C(r,\theta) = 1+\epsilon r\cos\theta$, assuming the same exact solution as in
 section (\ref{twodEx})
 \begin{equation}
   \phi(r,\theta) = (1-r^2)r^m\cos m\theta,
 \end{equation}
 the right-hand side becomes
 \begin{equation}
   \begin{split}
     \rho(r,\theta) =  4(m+1)r^m\cos m\theta & +
       \frac{\epsilon r^m}{2}(4+5m-m/r^2)\cos(m-1)\theta \\
       &+ \frac{\epsilon r^m}{2}(4+3m+m/r^2)\cos(m+1)\theta.
   \end{split}
 \end{equation}
 
 This problem is solved in real space and Fourier space respectively in example
 {\tt pde2d\_sym\_pardiso.f90} and example {\tt pde2d\_sym\_pardiso\_dft.f90}.
 Both use  the {\tt PARDISO\_BSPLINES} module to solve the sparse
 matrix equation. It should be noted that the Fourier method should yield the
 \emph{same solution} as found with the solver in real space if \emph{all} the
 $N_\theta$ Fourier modes are kept.
 
 For the problem defined above with {\tt m=3}, by keeping only the seven Fourier
 modes in $[-3,3]$ and the three mode couplings $[-1,0,1]$ in the
 Fourier solver, we found that both methods yield the same (up to 5
 digits) \emph{relative discretization error}. Furthermore, increasing
 the number of Fourier modes to $[-4,4]$ (note that the $m=\pm 4$
 Fourier components of the right hand side $\rho$ are not null) does
 not increase the accuracy of the computed solution!
 
 In this example, the matrix in Fourier space has a rank which is
 $N_\theta/7$ times smaller than in the solver in real space. The number of
 non-zeros is also reduced by a factor of $(2p+1)/3$ since only 3
 Fourier mode coupling terms are considered.
 
 In general, the efficiency of such a \emph{matrix filter} is expected
 to be problem-dependent. The Fourier solver should be tested in real simulations.
 
 \section{The matrix construction module {\tt CONMAT\_MOD}}
 \label{secCONMAT}
 The module implements the generic matrix construction subroutine
 {\tt conmat}, using the algorithm detailed in Appendix~\ref{matAssembly},
 for 1D and 2D differential equations. The computed matrix is returned in the
 argument {\tt mat} which can be a Lapack band matrix as well as a
 PARDISO, WSMP or MUMPS sparse matrix.
 The complete interface of the subroutine is given below.
 
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE conmat(spl, mat, coefeq, maxder)
     TYPE(spline1d|spline2d) :: spl
     TYPE([z]gbmat|[z]pbmat|[z]periodic_mat|[z]pardiso|...) :: mat
     INTEGER, INTENT(in), OPTIONAL :: maxder[(2)]
     INTERFACE
      SUBROUTINE coefeq(x, [y], idt, idw, c)
        DOUBLE PRECISION, INTENT(in) :: x, [y]
        INTEGER, INTENT(out) :: idt(:), idw(:)
        DOUBLE PRECISION, INTENT(out) :: c(:)
      END SUBROUTINE coefeq
     END INTERFACE
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Construct the FE matrix for 1D or 2D differential operator.
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    spl    : 1D or 2D spline
    mat    : matrix object
    coefeq : user provided subroutine (see below)
    maxder : Maximum order of the derivatives in the weak form.
             Equal to 1 (first derivative) by default.
 \end{verbatim}
 \end{description}
 
 The subroutine {\tt conmat} includes, in addition to the arguments
 {\tt spl, mat} and {\tt maxder} described above,
 an user provided subroutine as the third
 argument {\tt coefeq} which computes all the weak
 form coefficients defined in Eq.(\ref{locMat1d}) and Eq.(\ref{locMat2d})
 for a given point ($x$ for 1D case or $(x,y)$ for 2D case). The output
 array {\tt c} will contain all the computed $C$ with its corresponding
 derivative orders $(d,d')$ returned in {\tt idt, idw} respectively.
 Other quantities required to calculate the coefficients $C$ could be
 communicated to {\tt coefeq}, using for example a {\tt COMMON} block or
 a {\tt MODULE}.
 
 An example of using this module can be found in section \ref{twodEx}.
 
 \appendix
 
 \section{Matrix assembly for differential operators}
 \label{matAssembly}
 \subsection{1D case}
 \subsubsection{Local matrix}
 The contribution to the discretized weak-form from the interval
 $[x_i,  x_{i+1}]$ where $i=1,\ldots,N$, is a sum of the \emph{local matrices}
 \begin{equation}
 \label{locMat1d}
   \begin{split}
   A^i_{\alpha\alpha'} &= \int_{x_i}^{x_{i+1}}\!\!dx \;C(x)
       \Lambda^d_{\alpha}(x)\Lambda^{d'}_{\alpha'}(x) \\
   &\simeq \sum_{g=1}^{G}\,
       \underbrace{w_g\Lambda^d_{\alpha}(x_g)\Lambda^{d'}_{\alpha'}(x_g)}_{F_{\alpha\alpha'g}}
      \,\underbrace{C(x_g)}_{c_g},
   \end{split}
 \end{equation}
 where a $G$ point Gauss quadrature over the interval $[x_i,  x_{i+1}]$
 is used to approximate the integral and
 $\Lambda^d_{\alpha}$ denotes the $d^{th}$ derivative of splines
 which are non zero in the interval $[x_i,  x_{i+1}]$. For splines of
 degree $p$, $\alpha=0,\ldots,p$. Note that the matrix can be written as a
 \emph{matrix-vector product}:
 \begin{equation}
   \mathbf{A}= \mathbf{F} \cdot \mathbf{c}.
 \end{equation}
 
 \subsubsection{Mapping to global matrix}
 For $N$ intervals, the number of spline elements of degree $p$, is
 $N_e=N+p$, or $N_e=N$ if the system is \emph{periodic}.
 Once the local matrix $A_{\alpha\alpha'}$ is formed, it can be added
 to the \emph{global} matrix using the mapping:
 \begin{equation}
   \begin{split}
     A^g_{II'} &\leftarrow A^g_{II'} + A^i_{\alpha\alpha'} \\
     I = i+\alpha, & \qquad I' = i+\alpha'
   \end{split}
 \end{equation}
 For periodic problems, the indices $I,I'$ are further transformed
 by taking into account the periodicity $N$, using for example the
 following {\tt FORTRAN} statement
 
 \begin{center} \tt
   I = MODULO(I-1,N) + 1 \\
 \end{center}
 
 \subsection{2D case}
 \subsubsection{Local matrix}
 In this case, the local matrix obtained for the grid cell
 $[x_i,  x_{i+1}]\times[y_j,  y_{j+1}]$ takes the form:
 \begin{equation}
 \label{locMat2d}
   \begin{split}
   A_{\alpha\alpha'\beta\beta'} &= \int_{x_i}^{x_{i+1}}\!\!dx
   \int_{y_j}^{y_{j+1}}\!\!dy\,\Lambda^{d_1}_{\alpha}(x)\Lambda^{d_1'}_{\alpha'}(x)\;C(x,y)
       \,\Lambda^{d_2}_{\beta}(y)\Lambda^{d_2'}_{\beta'}(y) \\
   &\simeq \sum_{g_1=1}^{G_1}\,
       \underbrace{w_{g_1}\Lambda^{d_1}_{\alpha}(x_{g_1})\Lambda^{{d_1}'}_{\alpha'}(x_{g_1})}_{F_{\alpha\alpha'g_1}}
      \;\sum_{g_2=1}^{G_2}\,\underbrace{C(x_{g_1},y_{g_2})}_{C_{g_1g_2}}
      \underbrace{w_{g_2}\Lambda^{d_2}_{\beta}(y_{g_2})\Lambda^{{d_2}'}_{\beta'}(y_{g_2})}_{G_{\beta\beta'g_2}},
   \end{split}
 \end{equation}
 which can be computed efficiently as \emph{matrix-matrix products}
 \begin{equation}
   \mathbf{A} = \mathbf{F}\cdot\mathbf{C}\cdot\mathbf{G^{T}}
 \end{equation}
 \subsubsection{Mapping to global matrix}
 The local to global element indices mapping on each of the two
 dimensions can be defined as previously as
 \begin{equation}
   \begin{split}
     I = i+\alpha, & \qquad I' = i+\alpha' \\
     J = j+\beta, & \qquad J' = j+\beta'
   \end{split}
 \end{equation}
 If any of the 2 dimensions is periodic, the periodic condition have
 to be applied to the corresponding global element index as explained
 above.
 
 Furthermore, in order to reduce the 4 dimension array $A^g_{II'JJ'}$
 to the standard 2 dimension matrix, we number first the elements in
 $y$ coordinate and obtain the following index transformation:
 \begin{equation}
   \mu = J + N^y_e(I-1), \qquad \mu' = J' +  N^y_e(I'-1),
 \end{equation}
 where $N^y_e$ is the number of elements along the $y$ coordinate. The
 \emph{global} matrix is then constructed from
 \begin{equation}
   A^g_{\mu\mu'} \leftarrow A^g_{\mu\mu'} + A_{\alpha\alpha'\beta\beta'}
 \end{equation}
 
 \section{The boundary conditions}
 \subsection{Dirichlet condition}
 \label{DirichletCond}
 \subsubsection{1D case}
 Let us consider the boundary condition
 $u(0)=c$. Since all the splines are 0
 at $x=0$, except for the first spline which is equal to 1,
 $\Lambda_i(0)=\delta_{i,1}$, we have simply
 \begin{equation}
   c=u(0) = \sum_{i=1}^N u_i \Lambda_i(0) \quad \Longrightarrow
   \quad u_{1} = c.
 \end{equation}
 The discretized linear system of equations, taking into account of
 this BC, can thus be written as
 \begin{equation}
   \begin{split}
     u_1 &= c\\
     \sum_{j=2}^N A_{ij}u_j &= f_i - A_{i1}c, \quad i=2,\ldots, N
   \end{split}
 \end{equation}
 or in the following  matrix form:
 \begin{equation}
      \left(\begin{matrix}
        1 & 0 & \cdots \\
        0 & A_{22} & \cdots \\
        \vdots & \vdots & \ddots \\
      \end{matrix}\right)
      \left(\begin{matrix}
        u_{1} \\
        u_{2} \\
        \vdots\\
        u_{N} \\
     \end{matrix}\right) =
      \left(\begin{matrix}
        c \\
        f_{2} -cA_{21}\\
        \vdots\\
        f_{N} -cA_{N1}\\
      \end{matrix}\right)
 \end{equation}
 Note that (1) the transformed matrix preserves any symmetry or
 positivity of the original matrix, (2) the first column of the
 original matrix has to be saved in order to modify the RHS $f_i$
 but only for non zero $c$ and (3) in that case, one needs to save
 only $[A_{i1}]_{i=2}^{p+1}$, where $p$ is the spline order.
 
 In summary, the procedure for imposing the Dirichlet BC $u_1=c$ can be
 summarized as follows:
 \begin{enumerate}
 \item Matrix transformation
   \begin{enumerate}
   \item Clear the matrix row $i=1$ and set its diagonal term
     $A_{11}$ to 1.
   \item Get the matrix column $A_{j1}, \quad j=2,\ldots,p+1$ and save it.
   \item Clear the matrix column $j=1$ and set its diagonal term
     $A_{11}$ to 1.
   \end{enumerate}
 \item RHS transformation
   \begin{enumerate}
   \item Set $f_1\leftarrow c$.
   \item Modify the RHS: $f_i\leftarrow f_i-A_{i1}c, \quad i=2,\ldots,p+1$.
   \end{enumerate}
 \end{enumerate}
 If the original matrix \emph{is not symmetric}, only the steps (1a)
 and (2a) are required, since the other steps are only necessary to
 preserve the symmetry of the original matrix.
 
 \subsubsection{2D case}
 In that case, let us write the solution $u(x,y)$ as
 \begin{equation}
   u(x,y) = \sum_{i=1}^{N_1}\sum_{j=1}^{N_2} u_{ij} \Lambda_i(x)\Lambda_j(y),
 \end{equation}
 where $N_1, N_2$ are the number of elements in each
 dimension. Assuming the BC $u(0,y) = g(y)$, and since
 $\Lambda_i(0)=\delta_{i1}$, the solutions $u_{ij}$
 should satisfy
 \begin{equation}
 \label{dirich_2d}
   \sum_{j=1}^{N_2} u_{1j} \Lambda_j(y) = g(y).
 \end{equation}
 If $g(y)$ is constant $g(y)=c$, we obtain the trivial solution
 $u_{1,j}=c$ since $\sum_{j=1}^{N_2} \Lambda_j(y)=1$ \cite{BSPLINES}.
 For non-uniform $g$,
 at least 2 methods can be used to obtain the $N_2$ unknowns $u_{1j}$
 satisfying the equation above:
 \begin{enumerate}
 \item By \emph{collocating} Eq.(\ref{dirich_2d}) on a \emph{suitable}
   set of points $[y_k]_{ k=1}^{N_2}$, the problem is reduced to an
   \emph{interpolation} one (see section ``Spline
   Interpolation'' in \cite{BSPLINES}).
 \item By \emph{minimizing} the residual norm of Eq.(\ref{dirich_2d})
   defined as follows:
     \begin{gather}
       R = \left\|\sum_{j=1}^{N_2} c_{j} \Lambda_j(y)-g(y)\right\|^2 =
       \int\!\!dy\left\{\left[\sum_{j=1}^{N_2} c_{j} \Lambda_j(y)\right]^2
       - 2g(y)\sum_{j=1}^{N_2} c_{j} \Lambda_j(y) +g^2(y)\right\}\\
       \frac{\partial R}{\partial c_k} = 2 \int\!\!dy\left[
       \sum_{j=1}^{N_2} c_{j} \Lambda_j(y)\Lambda_k(y)
       -g(y)\Lambda_k(y)\right] = 0, \quad k=1,\ldots,N_2,
     \end{gather}
     the boundary solutions $c_j$ can be calculated by solving the following
     \emph{weak-form} of Eq.(\ref{dirich_2d}):
     \begin{equation}
       \sum_{j=1}^{N_2} c_{j} \int\!\!dy\Lambda_j(y)\Lambda_k(y) =
       \int\!\!dy\Lambda_k(y) g(y), \qquad k=1,\ldots,N_2.
     \end{equation}
 \end{enumerate}
 Once the values of $c_j$ known, the procedure described for the 1D case
 above can be applied to satisfy each of the $N_2$ conditions $u_{1j}=c_j$.
 
 A full example for solving the cylindrical Laplace equation in
 cylindrical coordinates:
 \begin{equation}
   \begin{split}
   \frac{1}{r}\frac{\partial}{\partial r}
   \left(r\frac{\partial\phi}{\partial r}\right) &+\frac{1}{r^2}
   \frac{\partial^2\phi}{\partial \theta^2} = 0 \\
     \phi(r=1,\theta) &= \cos m\theta.
   \end{split}
 \end{equation}
 is given in {\tt bpslines/examples/dirichlet/poisson.f90}.
 
 \subsection{Unicity on the axis}
 \label{unicityCond}
 Denoting the $N$ solutions at the axis by $(u_1, \ldots, u_N)$  , and
 their transforms by $(\hat u_1, \ldots, \hat u_N)$ defined by
 
 \begin{equation} \begin{array}{ccc}
      u_1-u_N = \hat u_1  &  &   u_1 = \hat u_1 + \hat u_N   \\
      u_2-u_N = \hat u_2  &  &   u_2 = \hat u_2 + \hat u_N   \\
      \vdots        & \Longrightarrow & \vdots               \\
      u_{N-1}-u_N = \hat u_{N-1}  &  &   u_{N-1} = \hat u_{N-1} + \hat u_N   \\
      u_N = \hat u_N      &  &   u_N = \hat u_N,
     \end{array}  \label{eq:unicity1} \end{equation}
 the unicity condition can be specified by simply imposing
 
 \begin{equation}
   \hat u_1=\hat u_2=\ldots=\hat u_{N-1}=0. \label{eq:unicity2}
 \end{equation}
 From (\ref{eq:unicity1}), the \emph{transformation matrix} \(\mathbf U\) is defined
 as
 
 \begin{equation}
   \mathbf{u} = \mathbf{ U \cdot\hat u}, \qquad \mathbf{U} =
      \left(\begin{matrix}
         1 & 0 & \dots & 0 & 1 \\
         0 & 1 & \dots & 0 & 1 \\
           &   & \ddots&   & \vdots \\
         0 & 0 & \dots & 1 & 1 \\
         0 & 0 & \dots & 0 & 1
      \end{matrix}\right), \quad \mathbf{U^{T}} =
      \left(\begin{matrix}
         1 & 0 & \dots & 0 & 0 \\
         0 & 1 & \dots & 0 & 0 \\
           &   & \ddots&   & \vdots \\
         0 & 0 & \dots & 1 & 0 \\
         1 & 1 & \dots & 1 & 1
      \end{matrix}\right).
 \end{equation}
 
 
 \paragraph{Matrix product \( \mathbf{A\cdot U}\)}
 \begin{equation}
 \mathbf{ A\cdot U} =
      \left(\begin{array}{lllll}
         A_{1,1} & A_{1,2}  & \dots & A_{1,N-1} &  \sum_{j} A_{1,j}  \\
         A_{2,1} & A_{2,2}  & \dots & A_{2,N-1} &  \sum_{j} A_{2,j}  \\
           &   & \ddots&   & \vdots \\
         A_{N-1,1} & A_{N-1,2}  & \dots & A_{N-1,N-1} &  \sum_{j}A_{N-1,j}  \\
         A_{N,1} & A_{N,2}  & \dots & A_{N,N-1} &  \sum_{j}A_{N,j}
      \end{array}\right).
 \end{equation}
 Thus \emph{right multiply by \(\mathbf{U}\)} is equivalent to put the
 \emph{the sum of each row on the last column}.
 
 \paragraph{Matrix product \( \mathbf{ U^T \cdot A}\)}
 \begin{equation}
 \mathbf{ U^T \cdot A} =
      \left(\begin{array}{lllll}
         A_{1,1} & A_{1,2}  & \dots & A_{1,N-1} &  A_{1,N}  \\
         A_{2,1} & A_{2,2}  & \dots & A_{2,N-1} &  A_{2,N}  \\
           &   & \ddots&   & \vdots \\
         A_{N-1,1} & A_{N-1,2}  & \dots & A_{N-1,N-1} & A_{N-1,N}  \\
         \sum_{i}A_{i,1} & \sum_{i}A_{i,2} & \dots & \sum_{i}A_{i,N-1} &
         \sum_{i}A_{i,N}
      \end{array}\right).
 \end{equation}
 Thus \emph{left multiply by \(\mathbf{\hat U}\)} is equivalent to put the
 \emph{the sum of each column  on the last row}.
 
 \paragraph{Product \( \mathbf{\hat U \cdot b}\)}
 \begin{equation}
 \mathbf{\hat b} = \mathbf{U^T\cdot b} =
      \left(\begin{array}{l}
         b_1 \\
         b_2 \\
         \vdots \\
         b_{N-1} \\
         \sum_{i} b_{i}
      \end{array}\right),
 \end{equation}
 
 \paragraph{Transformation of the original matrix equation}
 The full original linear system, obtained from the discretization of the
 2D \(r,\theta\) polar coordinates can be written as:
 
 \begin{equation}
      \left(\begin{array}{ll}
        \mathbf{A} & \mathbf{B} \\
        \mathbf{C} & \mathbf{D}
      \end{array}\right)
      \left(\begin{array}{l}
        \mathbf{u} \\
        \mathbf{v}
      \end{array}\right) =
      \left(\begin{array}{l}
        \mathbf{b} \\
        \mathbf{c}
      \end{array}\right), \label{eq:orig_matrix_eq}
 \end{equation}
 where the solution array is split into the solutions \(\mathbf{u}\) at \(r=0\) and
 the solutions \(\mathbf{v}\) on the remaining domain. The transformed system can
 thus be written as
 
 \begin{equation*}
      \left(\begin{array}{ll}
        \mathbf{U^T} & 0 \\
        0            & \mathbf{I}
      \end{array}\right)
      \left(\begin{array}{ll}
        \mathbf{A} & \mathbf{B} \\
        \mathbf{C} & \mathbf{D}
      \end{array}\right)
      \left(\begin{array}{ll}
        \mathbf{U} & 0 \\
        0            & \mathbf{I}
      \end{array}\right)
      \left(\begin{array}{l}
        \mathbf{\hat u} \\
        \mathbf{v}
      \end{array}\right) =
      \left(\begin{array}{ll}
        \mathbf{U^T} &0  \\
        0            & \mathbf{I}
      \end{array}\right)
      \left(\begin{array}{l}
        \mathbf{b} \\
        \mathbf{c}
      \end{array}\right),
 \end{equation*}
 
 \begin{equation}
    \Longrightarrow
      \left(\begin{array}{cc}
        \mathbf{U^TAU} & \mathbf{U^TB} \\
        \mathbf{CU} & \mathbf{D}
      \end{array}\right)
      \left(\begin{array}{l}
        \mathbf{\hat u} \\
        \mathbf{v}
      \end{array}\right) =
      \left(\begin{array}{c}
        \mathbf{U^Tb} \\
        \mathbf{c}
      \end{array}\right),
 \end{equation}
 Notice that the transformation preserves any symmetry existing in the original system
 (\ref{eq:orig_matrix_eq}). The transformed matrix is finally given in the following where
 only the modified elements are shown and the sum is only over the first \(N\)
 points in \(\theta\) direction. The \(\times\) symbol denotes unmodified elements.
 
 \begin{equation}
      \left(\begin{array}{lllllll}
         \times & \times & \times & \times &  \sum_{j} A_{1,j}  & \times & \times \\
         \times & \times & \times & \times &  \sum_{j} A_{2,j}  & \times & \times \\
         \times & \times & \times & \times &  \vdots            & \times & \times \\
         \times & \times & \times & \times & \sum_{j} A_{N-1,j} & \times & \times \\
          \sum_{i}A_{i,1} & \sum_{i}A_{i,2}  & \dots & \sum_{i}A_{i,N-1} &
          \sum_{i,j}A_{i,j} &  \sum_{i}A_{i,N+1} & \dots \\
         \times & \times & \times & \times & \sum_{j} A_{N+1,j} & \times & \times \\
         \times & \times & \times & \times &  \vdots            & \times & \times
      \end{array}\right)
 \end{equation}
 Only the \(N^{th}\) column and the  \(N^{th}\) row are affected by the transformation.
 Applying now the unicity condition (\ref{eq:unicity2}) the final transformed system
 reads:
 
 \begin{equation}
      \left(\begin{array}{lllllll}
         1 & 0 & \dots & 0 & 0 & 0 & 0 \\
         0 & 1 & \dots  & 0 & 0 & 0 & 0 \\
         0 & 0 & \ddots & 0 &  \vdots            & 0 & 0 \\
         0 & 0 & \dots  & 1 & 0 & 0 & 0 \\
         0 & 0 & \dots  & 0 & \sum_{i,j}A_{i,j} &  \sum_{i}A_{i,N+1} & \dots \\
         0 & 0 & \dots  & 0 & \sum_{j} A_{N+1,j} & \times & \times \\
         0 & 0 & \dots  & 0 &  \vdots            & \times & \times
      \end{array}\right)
      \left(\begin{array}{l}
         \hat u_1   \\
         \hat u_2   \\
         \vdots\\
         \hat u_{N-1}\\
         \hat u_{N} \\
         u_{N+1} \\
         \vdots
      \end{array}\right) =
      \left(\begin{array}{l}
         0   \\
         0   \\
         \vdots\\
         0   \\
         \sum_{i} b_{i} \\
         b_{N+1} \\
         \vdots
      \end{array}\right).
 \end{equation}
 
 
 \section{{\tt MATRIX} Reference}
 \label{matRef}
 
 The following conventions are adopted in the routine descriptions:
 \begin{itemize}
 \item {\tt [z]} means optional: for example {\tt TYPE([z]gemat)}
   declares a variable which can be of type {\tt gemat} or {\tt zgemat}.
 \item The symbol ``$|$'' is the logical {\tt OR} operator. Thus
 \begin{verbatim}
   TYPE([z]gemat|[z]gbmat) :: mat
 \end{verbatim}
 declares that {\tt mat} can be either of type {\tt gemat}, {\tt
   zgemat}, {\tt pbmat} or {\tt zpbmat}.
 \item In a same declaration block, if a scalar or array of type {\tt
   DOUBLE PRECISION|COMPLEX} is declared together with a matrix object
   which can be also complex, both should be either real
   or complex. For example in the routine {\tt updtmat}, if {\tt mat}
   of type {\tt zgbmat}, {\tt val} should be complex.
 \end{itemize}
 
 \subsection{init}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
 TYPE([z]gemat) :: mat
   SUBROUTINE init(n, nterms, mat ,kmat)
 TYPE([z]gbmat) :: mat
   SUBROUTINE init(kl, ku, n, nterms, mat, kmat)
 TYPE([z]pbmat) :: mat
   SUBROUTINE init(ku, n, nterms, mat, kmat)
 TYPE([z]periodic_mat) :: mat
   SUBROUTINE init(kl, ku, n, nterms, mat, kmat)
     INTEGER, INTENT(in) :: kl, ku, n, nterms
     INTEGER, OPTIONAL   :: kmat
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Initialize data structure for matrix
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    n            : rank of matrix
    kl, ku       : number of sub and super diagonals
    nterms       : number of terms in weak form
    kmat         : matrix id
    mat          : matrix object
 \end{verbatim}
 \end{description}
 
 \subsection{destroy}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE destroy(mat)
     TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Free matrix memory
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
 \end{verbatim}
 \end{description}
 
 \subsection{updmat}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE updtmat(mat, i, j, val)
     TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
     INTEGER, INTENT(IN) :: i, j
     DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Update (accumulate) element $A_{ij}$
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    i           : row index
    j           : column index
    val         : input value
 \end{verbatim}
 \end{description}
 
 \subsection{putele}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE putele(mat, i, j, val)
     TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
     INTEGER, INTENT(IN) :: i, j
     DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Overwrite element $A_{ij}$
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    i           : row index
    j           : column index
    val         : input value
 \end{verbatim}
 \end{description}
 
 \subsection{putrow}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE putrow(mat, i, arr)
     TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
     INTEGER, INTENT(IN) :: i
     DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:)
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Overwrite a matrix row
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    i           : row index
    arr         : input array
 \end{verbatim}
 \end{description}
 
 \subsection{putcol}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE putcol(mat, j, arr)
     TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
     INTEGER, INTENT(IN) :: j
     DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:)
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Overwrite a matrix row
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    j           : column index
    arr         : input array
 \end{verbatim}
 \end{description}
 
 \subsection{getele}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE getele(mat, i, j, val)
     TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
     INTEGER, INTENT(IN) :: i, j
     DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: val
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Get element $A_{ij}$
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    i           : row index
    j           : column index
    val         : output value
 \end{verbatim}
 \end{description}
 
 \subsection{getrow}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE getrow(mat, i, arr)
     TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
     INTEGER, INTENT(IN) :: i
     DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:)
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Get a matrix row
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    i           : row index
    arr         : output array
 \end{verbatim}
 \end{description}
 
 \subsection{getcol}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE getcol(mat, j, arr)
     TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
     INTEGER, INTENT(IN) :: j
     DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:)
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Get a matrix column
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    i           : column index
    arr         : output array
 \end{verbatim}
 \end{description}
 
 \subsection{vmx}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   FUNCTION vmx(mat, x)
     TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
     DOUBLE PRECISION|COMPLEX, DIMENSION(:), INTENT(in) :: x
     DOUBLE PRECISION|COMPLEX, DIMENSION(SIZE(x)) :: vmx
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Matrix-vector product $Ax$
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    x           : input array
    vmx         : output array
 \end{verbatim}
 \end{description}
 
 \subsection{mcopy}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE mcopy(mata, matb)
     TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mata, matb
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Matrix copy: $B = A$
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mata        : input matrix object
    matb        : output matrix object
 \end{verbatim}
 \end{description}
 
 \subsection{maddto}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE maddto(mata, alpha, matb)
     TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mata, matb
     DOUBLE PRECISION|COMPLEX : alpha
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Matrix addition: $A  \leftarrow A+\alpha B$
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mata        : input matrix object
    matb        : output matrix object
   alpha        : input scalar
 \end{verbatim}
 \end{description}
 
 \subsection{determinant}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE determinant(mat, base, pow)
     TYPE([z]gemat|[z]gbmat|[z]pbmat) :: mat
     INTEGER, INTENT(out) :: pow
     DOUBLE PRECISION|COMPLEX : base
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Returns the determinant of matrix as $D = \text{base}\times 10^{\text{pow}}$
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : input matrix object
    base        : mantissa of determinant
    pow         : exponent of determinant
 \end{verbatim}
 \end{description}
 
 \subsection{factor}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE factor(mat)
     TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 LU (Cholesky for symmetric/hermitian matrix) factorization
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : inout matrix object
 \end{verbatim}
 \end{description}
 
 \subsection{bsolve}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE bsolve(mat)
     TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
     DOUBLE PRECISION|COMPLEX, DIMENSION [(:)] :: rhs
     DOUBLE PRECISION|COMPLEX, DIMENSION [(:),] OPTIONAL, INTENT (out) :: sol
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Solve the linear system using the factored matrix, for a single or
 multiple right-hand-side
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat  : input factored matrix object
    rhs  : input right-hand-side, overwriten by the solution if sol is not present
    sol  : contains solution
 \end{verbatim}
 \end{description}
 
 \section{{\tt SPMAT} Reference}
 \label{spmatRef}
 
 The following conventions are adopted in the routine descriptions:
 \begin{itemize}
 \item {\tt [z]} means optional: for example {\tt TYPE([z]gemat)}
   declares a variable which can be of type {\tt gemat} or {\tt zgemat}.
 \item The symbol ``$|$'' is the logical {\tt OR} operator. Thus
 \begin{verbatim}
   TYPE([z]gemat|[z]gbmat) :: mat
 \end{verbatim}
 declares that {\tt mat} can be either of type {\tt gemat}, {\tt
   zgemat}, {\tt pbmat} or {\tt zpbmat}.
 \item In a same declaration block, if a scalar or array of type {\tt
   DOUBLE PRECISION|COMPLEX} is declared together with a matrix object
   which can be also complex, both should be either real
   or complex. For example in the routine {\tt updtmat}, if {\tt mat}
   of type {\tt zgbmat}, {\tt val} should be complex.
 \end{itemize}
 
 \subsection{init}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE init(n, mat, istart, iend)
     TYPE([z]spmat) :: mat
     INTEGER, INTENT(in), OPTIONAL :: istart, iend
     INTEGER, INTENT(in) :: n
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Initialize an empty sparse matrix of $n$ rows.
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    n            : rank of matrix
    mat          : matrix object
    istart, iend : range of row indices. By default istart=1, iend=n
 \end{verbatim}
 \end{description}
 
 \subsection{destroy}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE destroy(mat)
     TYPE([z]spmat) :: mat
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Free matrix memory
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
 \end{verbatim}
 \end{description}
 
 \subsection{updmat}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE updtmat(mat, i, j, val)
     TYPE([z]spmat) :: mat
     INTEGER, INTENT(IN) :: i, j
     DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Update (accumulate) an existing element $A_{ij}$ or insert it in the
 linked list in an increasing order in the column index j.
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    i           : row index
    j           : column index
    val         : input value
 \end{verbatim}
 \end{description}
 
 \subsection{putele}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE putele(mat, i, j, val, nlforce_zero)
     TYPE([z]pbmat) :: mat
     INTEGER, INTENT(IN) :: i, j
     DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val
     LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Overwrite an existing element $A_{ij}$ or insert it in the
 linked list in an increasing order in the column index j.
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat          : matrix object
    i            : row index
    j            : column index
    val          : input value
    nlforce_zero : Never remove an existing element when input is zero if TRUE
                   FALSE by default
 \end{verbatim}
 \end{description}
 
 \subsection{putrow}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE putrow(mat, i, arr, col, nlforce_zero)
     TYPE([z]spmat) :: mat
     INTEGER, INTENT(IN) :: i
     DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:)
     INTEGER, INTENT(in), OPTIONAL :: col(:)
     LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Overwrite a matrix row
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat          : matrix object
    i            : row index
    arr          : input array
    col          : input array containing column indices
    nlforce_zero : Never remove an existing element when input is zero if TRUE
                   FALSE by default
 \end{verbatim}
 \end{description}
 
 \subsection{putcol}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE putcol(mat, j, arr, nlforce_zero)
     TYPE([z]spmat) :: mat
     INTEGER, INTENT(IN) :: j
     DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:)
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Overwrite a matrix row
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat          : matrix object
    j            : column index
    arr          : input array
    nlforce_zero : Never remove an existing non-zero element if .TRUE.
                   .FALSE. by default
 \end{verbatim}
 \end{description}
 
 \subsection{getele}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE getele(mat, i, j, val)
     TYPE([z]spmat) :: mat
     INTEGER, INTENT(IN) :: i, j
     DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: val
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Get element $A_{ij}$
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    i           : row index
    j           : column index
    val         : output value
 \end{verbatim}
 \end{description}
 
 \subsection{getrow}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE getrow(mat, i, arr, col)
     TYPE([z]spmat) :: mat
     INTEGER, INTENT(IN) :: i
     DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:)
     INTEGER, INTENT(out), OPTIONAL :: col(:)
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Get a matrix row and optionally the column indices
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    i           : row index
    arr         : output array
    col         : output array containing column indices
 \end{verbatim}
 \end{description}
 
 \subsection{getcol}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE getcol(mat, j, arr)
     TYPE([z]spmat) :: mat
     INTEGER, INTENT(IN) :: j
     DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:)
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Get a matrix column
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    i           : column index
    arr         : output array
 \end{verbatim}
 \end{description}
 
 \subsection{get\_count}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   INTEGER FUNCTION get_count(mat, nnz)
     TYPE([z]spmat) :: mat
     INTEGER, INTENT(out), OPTIONAL :: nnz(:)
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Returns the number of non-zeros and optionally an array of numbers
 of non-zeros on each row
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    nnz         : array containing numbers of non-zeros on each row.
 \end{verbatim}
 \end{description}
 
 \section{{\tt PARDISO\_BSPLINES} Reference}
 \label{pardisoRef}
 The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow,
 getcol, vmx, mcopy, maddto} and {\tt destroy}
 have \emph{exactly} the same list of arguments as
 those from the {\tt MATRIX} module (as documented in
 Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines
 that have different arguments. The same conventions as before are used
 for the routine description.
 
 \subsection{init}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, &
        &                      nlforce_zero)
     INTEGER, INTENT(in)           :: n, nterms
     TYPE([z]pardiso_mat) :: mat
     INTEGER, OPTIONAL, INTENT(in) :: kmat
     LOGICAL, OPTIONAL, INTENT(in) :: nlsym
     LOGICAL, OPTIONAL, INTENT(in) :: nlpos
     LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Initialize the PARDISO solver. A SPMAT matrix of $n$ empty rows is initialized.
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    n            : rank of matrix
    nterms       : number of terms in weak form
    kmat         : matrix id
    mat          : matrix object
    nlsym        : symmetric or not. Default is .FALSE.
    nlherm       : Hermitian or not for complex matrix . Default is .FALSE.
    nlpos        : Positive-definite or not. Default is .TRUE.
    nlforce_zero : Never remove an existing non-zero element if .TRUE.
                   .TRUE. by default
 \end{verbatim}
 \end{description}
 
 \subsection{clear\_mat}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE clear_mat(mat)
     TYPE([z]pardiso_mat)         :: mat
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
  Clear matrix, keeping its sparse structure unchanged
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat          : matrix object
 \end{verbatim}
 \end{description}
 
 \subsection{psum\_mat}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE sum_mat(mat, comm)
     TYPE([z]pardiso_mat)         :: mat
     INTEGER, INTENT(in)          :: comm
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
  Parallel sum of matrices. Result matrix is placed in the sparse
  matrix mat\%mat on all processes of comm.
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat          : matrix object
    comm         : communicator
 \end{verbatim}
 \end{description}
 
 \subsection{p2p\_mat}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE p2p_mat(mat, dest, extyp, op, comm)
     TYPE([z]pardiso_mat)         :: mat
     INTEGER, INTENT(in)          :: dest
     CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv')
     CHARACTER(len=*), INTENT(in) :: op    ! ('put', 'updt')
     INTEGER, INTENT(in)          :: comm
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
   Point-to-point combine sparse matrix between 2 processes.
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat          : matrix object
    dest         : rank of remote process
    extyp        : exchange type ('send', 'recv', 'sendrecv')
    op           : operation type ('put', 'updt')
    comm         : communicator
 \end{verbatim}
 \end{description}
 
 \subsection{get\_count}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   INTEGER FUNCTION get_count(mat, nnz)
     TYPE([z]pardiso_mat) :: mat
     INTEGER, INTENT(out), OPTIONAL :: nnz(:)
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Returns the number of non-zeros and optionally an array of numbers
 of non-zeros on each row
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    nnz         : array containing numbers of non-zeros on each row.
 \end{verbatim}
 \end{description}
 
 \subsection{factor}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE factor(mat, nlreord, nlmetis, debug)
     TYPE([z]pardiso_mat)          :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: nlreord
     LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
     LOGICAL, OPTIONAL, INTENT(in) :: debug
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Wrapper of to\_mat, reord\_mat and numfact
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    nlreord     : call reord_mat if .TRUE. (default is .TRUE.)
    nlmetis     : use METIS nested dissection for reoredering. Default
                  is minimum degree alogorithm.
    debug       : verbose output from PARDISO if .TRUE. Default is .FALSE.
 \end{verbatim}
 \end{description}
 
 \subsection{bsolve}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE bsolve_pardiso_mat1(mat, rhs, sol, nref, debug)
     TYPE([z]pardiso_mat)               :: mat
     DOUBLE PRECISION|COMPLEX           :: rhs(:)
     DOUBLE PRECISION|COMPLEX, OPTIONAL :: sol(:)
     INTEGER, OPTIONAL                  :: nref
     LOGICAL, OPTIONAL, INTENT(in)      :: debug
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Wrapper of to\_mat, reord\_mat and numfact
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat   : matrix object
    rhs   : input right-hand-side, overwriten by the solution if sol is not present
    sol   : contains solution
    ref   : maximum number of refinement steps. Default is 0 (no refinement).
    debug : verbose output from PARDISO if .TRUE. Default is .FALSE.
 \end{verbatim}
 \end{description}
 
 \subsection{to\_mat}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE to_mat(mat)
     TYPE([z]pardiso_mat)          :: mat
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Convert linked list spmat to pardiso matrix structure
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
 \end{verbatim}
 \end{description}
 
 \subsection{reord\_mat}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE reord_mat(mat, nlmetis, debug)
     TYPE([z]pardiso_mat)          :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
     LOGICAL, OPTIONAL, INTENT(in) :: debug
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Reordering and symbolic factorization
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    nlmetis     : use METIS nested dissection for reoredering. Default
                  is minimum degree alogorithm.
    debug       : verbose output from PARDISO if .TRUE. Default is .FALSE.
 \end{verbatim}
 \end{description}
 
 \subsection{numfact}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE numfact(mat, debug)
     TYPE([z]pardiso_mat)          :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: debug
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Numerical factorization
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    debug       : verbose output from PARDISO if .TRUE. Default is .FALSE.
 \end{verbatim}
 \end{description}
 
 \section{{\tt [P]WSMP\_BSPLINES} Reference}
 \label{wsmpRef}
 The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow,
 getcol, vmx, mcopy, maddto} and {\tt destroy}
 have \emph{exactly} the same list of arguments as
 those from the {\tt MATRIX} module (as documented in
 Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines
 that have different arguments. The same conventions as before are used
 for the routine description.
 
 \subsection{init}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, &
        &                      nlforce_zero, [comm_in])
     INTEGER, INTENT(in)           :: n, nterms
     TYPE([z]wsmp_mat) :: mat
     INTEGER, OPTIONAL, INTENT(in) :: kmat
     LOGICAL, OPTIONAL, INTENT(in) :: nlsym
     LOGICAL, OPTIONAL, INTENT(in) :: nlpos
     LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
     INTEGER, OPTIONAL, INTENT(in) :: comm_in
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Initialize the WSMP solver. A SPMAT matrix of $n$ empty rows is initialized.
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    n            : rank of matrix
    nterms       : number of terms in weak form
    kmat         : matrix id
    mat          : matrix object
    nlsym        : symmetric or not. Default is .FALSE.
    nlherm       : Hermitian or not for complex matrix . Default is .FALSE.
    nlpos        : Positive-definite or not. Default is .TRUE.
    nlforce_zero : Never remove an existing non-zero element if .TRUE.
                   .TRUE. by default
    comm_in      : MPI communicator. By default MPI_COMM_WORLD (only in PWSMP_BSPLINES)
 \end{verbatim}
 \end{description}
 
 \subsection{clear\_mat}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE clear_mat(mat)
     TYPE([z]wsmp_mat) :: mat
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
  Clear matrix, keeping its sparse structure unchanged
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat          : matrix object
 \end{verbatim}
 \end{description}
 
 \subsection{psum\_mat}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE sum_mat(mat, comm)
     TYPE([z]wsmp_mat)         :: mat
     INTEGER, INTENT(in)          :: comm
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
  Parallel sum of matrices. Result matrix is placed in the sparse
  matrix mat\%mat on all processes of comm.
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat          : matrix object
    comm         : communicator
 \end{verbatim}
 \end{description}
 
 \subsection{p2p\_mat}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE p2p_mat(mat, dest, extyp, op, comm)
     TYPE([z]wsmp_mat)         :: mat
     INTEGER, INTENT(in)          :: dest
     CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv')
     CHARACTER(len=*), INTENT(in) :: op    ! ('put', 'updt')
     INTEGER, INTENT(in)          :: comm
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
   Point-to-point combine sparse matrix between 2 processes.
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat          : matrix object
    dest         : rank of remote process
    extyp        : exchange type ('send', 'recv', 'sendrecv')
    op           : operation type ('put', 'updt')
    comm         : communicator
 \end{verbatim}
 \end{description}
 
 \subsection{get\_count}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   INTEGER FUNCTION get_count(mat, nnz)
     TYPE([z]wsmp_mat) :: mat
     INTEGER, INTENT(out), OPTIONAL :: nnz(:)
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Returns the number of non-zeros and optionally an array of numbers
 of non-zeros on each row
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    nnz         : array containing numbers of non-zeros on each row.
 \end{verbatim}
 \end{description}
 
 \subsection{factor}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE factor(mat, nlreord)
     TYPE([z]wsmp_mat)          :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: nlreord
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Wrapper of to\_mat, reord\_mat and numfact
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    nlreord     : call reord_mat if .TRUE. (default is .TRUE.)
 \end{verbatim}
 \end{description}
 
 \subsection{bsolve}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE bsolve_wsmp_mat1(mat, rhs, sol, nref)
     TYPE([z]wsmp_mat)               :: mat
     DOUBLE PRECISION|COMPLEX           :: rhs(:)
     DOUBLE PRECISION|COMPLEX, OPTIONAL :: sol(:)
     INTEGER, OPTIONAL                  :: nref
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Wrapper of to\_mat, reord\_mat and numfact
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat   : matrix object
    rhs   : input right-hand-side, overwriten by the solution if sol is not present
    sol   : contains solution
    ref   : maximum number of refinement steps. Default is 0 (no refinement).
    debug : verbose output from WSMP if .TRUE. Default is .FALSE.
 \end{verbatim}
 \end{description}
 
 \subsection{to\_mat}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE to_mat(mat)
     TYPE([z]wsmp_mat)          :: mat
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Convert linked list spmat to wsmp matrix structure
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
 \end{verbatim}
 \end{description}
 
 \subsection{reord\_mat}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE reord_mat(mat)
     TYPE([z]wsmp_mat)          :: mat
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Reordering and symbolic factorization
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
 \end{verbatim}
 \end{description}
 
 \subsection{numfact}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE numfact(mat)
     TYPE([z]wsmp_mat)          :: mat
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Numerical factorization
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
 \end{verbatim}
 \end{description}
 
 \section{{\tt MUMPS\_BSPLINES} Reference}
 \label{mumpsRef}
 The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow,
 getcol, vmx, mcopy, maddto} and {\tt destroy}
 have \emph{exactly} the same list of arguments as
 those from the {\tt MATRIX} module (as documented in
 Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines
 that have different arguments. The same conventions as before are used
 for the routine description.
 
 \subsection{init}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, &
        &                      nlforce_zero, comm_in)
     INTEGER, INTENT(in)           :: n, nterms
     TYPE([z]mumps_mat) :: mat
     INTEGER, OPTIONAL, INTENT(in) :: kmat
     LOGICAL, OPTIONAL, INTENT(in) :: nlsym
     LOGICAL, OPTIONAL, INTENT(in) :: nlpos
     LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
     INTEGER, OPTIONAL, INTENT(in) :: comm_in
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Initialize the MUMPS solver. A SPMAT matrix of $n$ empty rows is initialized.
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    n            : rank of matrix
    nterms       : number of terms in weak form
    kmat         : matrix id
    mat          : matrix object
    nlsym        : symmetric or not. Default is .FALSE.
    nlherm       : Hermitian or not for complex matrix . Default is .FALSE.
    nlpos        : Positive-definite or not. Default is .TRUE.
    nlforce_zero : Never remove an existing non-zero element if .TRUE.
                   .TRUE. by default
    comm_in      : MPI communicator. By default MPI_COMM_SELF (serial mode).
 \end{verbatim}
 \end{description}
 
 \subsection{clear\_mat}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE clear_mat(mat)
     TYPE([z]mumps_mat) :: mat
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
  Clear matrix, keeping its sparse structure unchanged
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat          : matrix object
 \end{verbatim}
 \end{description}
 
 \subsection{psum\_mat}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE sum_mat(mat, comm)
     TYPE([z]mumps_mat)         :: mat
     INTEGER, INTENT(in)          :: comm
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
  Parallel sum of matrices. Result matrix is placed in the sparse
  matrix mat\%mat on all processes of comm.
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat          : matrix object
    comm         : communicator
 \end{verbatim}
 \end{description}
 
 \subsection{p2p\_mat}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE p2p_mat(mat, dest, extyp, op, comm)
     TYPE([z]mumps_mat)         :: mat
     INTEGER, INTENT(in)          :: dest
     CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv')
     CHARACTER(len=*), INTENT(in) :: op    ! ('put', 'updt')
     INTEGER, INTENT(in)          :: comm
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
   Point-to-point combine sparse matrix between 2 processes.
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat          : matrix object
    dest         : rank of remote process
    extyp        : exchange type ('send', 'recv', 'sendrecv')
    op           : operation type ('put', 'updt')
    comm         : communicator
 \end{verbatim}
 \end{description}
 
 \subsection{get\_count}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   INTEGER FUNCTION get_count(mat, nnz)
     TYPE([z]mumps_mat) :: mat
     INTEGER, INTENT(out), OPTIONAL :: nnz(:)
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Returns the number of non-zeros and optionally an array of numbers
 of non-zeros on each row
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    nnz         : array containing numbers of non-zeros on each row.
 \end{verbatim}
 \end{description}
 
 \subsection{factor}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE factor(mat, nlreord)
     TYPE([z]mumps_mat)          :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: nlreord
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Wrapper of to\_mat, reord\_mat and numfact
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
    nlreord     : call reord_mat if .TRUE. (default is .TRUE.)
 \end{verbatim}
 \end{description}
 
 \subsection{bsolve}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE bsolve_mumps_mat1(mat, rhs, sol, nref)
     TYPE([z]mumps_mat)               :: mat
     DOUBLE PRECISION|COMPLEX           :: rhs(:)
     DOUBLE PRECISION|COMPLEX, OPTIONAL :: sol(:)
     INTEGER, OPTIONAL                  :: nref
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Wrapper of to\_mat, reord\_mat and numfact
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat   : matrix object
    rhs   : input right-hand-side, overwriten by the solution if sol is not present
    sol   : contains solution
    ref   : maximum number of refinement steps. Default is 0 (no refinement).
    debug : verbose output from MUMPS if .TRUE. Default is .FALSE.
 \end{verbatim}
 \end{description}
 
 \subsection{to\_mat}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE to_mat(mat)
     TYPE([z]mumps_mat)          :: mat
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Convert linked list spmat to mumps matrix structure
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
 \end{verbatim}
 \end{description}
 
 \subsection{reord\_mat}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE reord_mat(mat)
     TYPE([z]mumps_mat)          :: mat
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Reordering and symbolic factorization
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
 \end{verbatim}
 \end{description}
 
 \subsection{numfact}
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   SUBROUTINE numfact(mat)
     TYPE([z]mumps_mat)          :: mat
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 \begin{description}
 \item[Purpose:] \mbox{}
 Numerical factorization
 \item[Arguments:] \mbox{}
 \begin{verbatim}
    mat         : matrix object
 \end{verbatim}
 \end{description}
 
 
 \begin{thebibliography}{99}
 \bibitem{BSPLINES} {\tt BSPLINES} Reference Guide.
 \bibitem{PARDISO} \url{http://www.pardiso-project.org/}
 \bibitem{WSMP} \url{http://www-users.cs.umn.edu/~agupta/wsmp.html}
 \bibitem{MUMPS} \url{http://graal.ens-lyon.fr/MUMPS/}
 \bibitem{McMillan} B. F. McMillan, et. al. \emph{Rapid Fourier space
   solution of linear partial integro-differential equations in
   toroidal magnetic confinement geometries}, Computer Physics
   Communications 181(4),
   715-719 (2010)
 \end{thebibliography}
 
 \end{document}
diff --git a/docs/manual/using_bsplines.tex b/docs/manual/using_bsplines.tex
index fec665a..974735d 100644
--- a/docs/manual/using_bsplines.tex
+++ b/docs/manual/using_bsplines.tex
@@ -1,366 +1,366 @@
 %
 % @file using_bsplines.tex
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 \documentclass[a4paper]{article}
 \usepackage{linuxdoc-sgml}
 \usepackage{graphicx}
 \usepackage{hyperref}
 \usepackage{amsmath}
 %\usepackage{verbatim}
 %\usepackage[notref]{showkeys}
 
 \title{\tt Using BSPLINES in Particle Codes}
 \author{Trach-Minh Tran}
 \date{v0.1, March 2012}
 \abstract{These notes present some practical considerations on using
   BSPLINES in particle codes, in particular for the charge or current
   assignment as well as the field interpolation. Performance
   measurements are done on an Intel Xeon X5570 and the more recent
   Xeon E5-2680.}
 
 \begin{document}
 \maketitle
 %\tableofcontents
 
 \section{Introduction}
 For simplicity, we assume in these notes that we are dealing with a
 2D electrostatic particle code and the 2D Poisson equation is to be solved
 using the Finite Element Method. Starting from the \emph{weak form}
 and using the \emph{splines} for both \emph{basis} and \emph{test}
 functions, the electrostatic field potential together with its
 gradient and the right hand side can be computed from
 \begin{equation}
   \begin{split}
     \phi(x,y) &=
       \sum_{ij}\,c_{ij}\,\Lambda_i(x)\Lambda_j(y) \\
       \frac{\partial\phi}{\partial x} &=
       \sum_{ij}\,c_{ij}\,\Lambda'_i(x)\Lambda_j(y) \\
       \frac{\partial\phi}{\partial y} &=
       \sum_{ij}\,c_{ij}\,\Lambda_i(x)\Lambda'_j(y) \\
     S_{ij} &= \sum_{\mu=1}^{N_p}\, q_\mu\Lambda_i(x_\mu)\Lambda_j(y_\mu),
   \end{split}
 \end{equation}
 where $c_{ij}$ are the solutions of the discretized Poisson equation
 and $\{x_\mu,y_\mu\}$ are the coordinates of the $N_p$ simulation
 particles. At each time step, the calculation of both the field $\phi$
 and its gradient (\emph{field interpolation}) for the particle
 pusher and the construction of the RHS $S_{i}$ (\emph{charge
   assignment}) involve thus the computation of a large number of
 splines $\Lambda$ and its derivatives $\Lambda'$.
 
 Notice that the construction of the solver matrix requires also
 the calculations of the splines. This operation is however performed only
 once at the initial timestep in the (most common) case where the
 matrix is time independent and thus will not be considered in further
 these notes.
 
 \section{Computation of splines}
 Let consider the grid defined by $x_i$, $i=1,\ldots,N+1$. Inside the
 interval $[x_i, x_{i+1}]$, the $p+1$ non-zero splines of degree $p$
 can be computed efficiently using its polynomial representation given
 by
 \begin{equation}
   \begin{split}
     \Lambda_{i+\alpha}(x) &= \sum_{k=0}^{p}\, V^{i}_{k\alpha}
       (x-x_i)^k, \qquad \alpha=1,\ldots,p+1, \\
     V^i_{k\alpha} &=
     \left.\frac{1}{k!}\frac{d^k}{dx^k}\Lambda_{i+\alpha}(x)\right|_{x=x_i}.
   \end{split}
 \end{equation}
 The $(p+1)^2N$ coefficients $V^i_{k\alpha}$ are precalculated and stored
 during the spline initialization (in routine {\tt SET\_SPLINE}) by
 using the \emph{recurrence relation} \cite{BSPLINES} to compute the spline and all its
 $p$ derivatives. Note that for periodic splines on an equidistant
 mesh, only $(p+1)^2$ coefficients  $V_{k\alpha}$ are required since
 the splines have \emph{translational invariance}.
 
 For a polynomial $P(x)=a_0+a_1x+\ldots +a_px^p$, its value can be
 calculated together with it first derivative, using Horner's rule as:
 
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   f = a(p)
   fp = f
   DO i=p-1,1,-1
    f = a(i) + x*f
    fp = f + x*fp
   END DO
   f = a(0) + x*f
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 showing that exactly $4p-2$ floating operations (flops) per point are
 required. If only the value of the polynomial is needed, only $2p$ flops
 per point are required.
 
 \section{Field interpolation}
 \subsection{1D case}
 Let considered first the 1D case. The spline expansion of $\phi$ for
 $x_i\le x < x_{i+1}$ are expressed as
 \begin{equation}
   \phi(x) = \sum_{\alpha=0}^pc_{i+\alpha}\Lambda_{i+\alpha}(x).
 \end{equation}
 To calculate the field using this spline expansion, $p+1$ splines
 have to be first calculated followed by the sum above, which
 yields a total cost of $2(p+1)^2\sim 2p^2$ flops per point. This cost
 can be reduced by observing that
 $\phi(x)$ is a \emph{piecewise polynomial} (PP) of degree $p$ in
 each interval. Its PP coefficients can be obtained from
 \begin{equation}
   \begin{split}
   \phi(x) &=  \sum_{\alpha=0}^pc_{i+\alpha}\sum_{k=0}^{p}\, V^{i}_{k\alpha}
       (x-x_i)^k \\
       &= \sum_{k=0}^{p}\, \Pi^{i}_{k}(x-x_i)^k, \qquad
   \Pi^{i}_{k}= \sum_{\alpha=0}^pc_{i+\alpha} V^{i}_{k\alpha}
   \end{split}
 \end{equation}
 Once the $N(p+1)$ PP coefficients $\Pi^{i}_{k}$ have been calculated
 from the spline expansion coefficients $c_{i+\alpha}$, only $2p$ flops per
 point are required to obtain the field value, using the Horner's rule
 described previously.
 
 \subsection{2D case}
 Extension for the spline expansion and the PP representation for
 $\phi(x,y)$ is straightforwards and yields, for $x_i\le x < x_{i+1}$,
 $y_j\le y < y_{j+1}$:
 \begin{equation}
   \begin{split}
       \phi(x,y) &= \sum_{\alpha=0}^{p1}\sum_{\beta=0}^{p2}c_{i+\alpha,j+\beta}
       \Lambda_{i+\alpha}(x)\Lambda_{j+\beta}(y) \\
       \phi(x,y) &= \sum_{k=0}^{p1}\sum_{l=0}^{p2}\,\Pi^{ij}_{kl}(x-x_i)^k(y-y_j)^l, \qquad
   \Pi^{ij}_{kl}= \sum_{\alpha=0}^{p1}\sum_{\beta=0}^{p2}c_{i+\alpha,j+\beta} V^{i}_{k\alpha}V^{j}_{l\beta},
   \end{split}
 \end{equation}
 where $ V^{i}_{k\alpha}$ and $V^{j}_{l\beta}$ are the PP
 coefficients of the splines $\Lambda_{i+\alpha}(x)$ and
 $\Lambda_{j+\beta}(y)$ respectively. Assuming the same spline order
 $p$ in
 both $x$ and $y$, the flop counts per point for the 2 representations are
 respectively $2(3p+2)(p+1)\sim 6p^2$ and $2p(p+2)\sim 2p^2$, while the
 storages required for the spline coefficients $c$ and the PP
 coefficients $\Pi$ are $(N+p)^2\sim N^2$ and $N^2(p+1)^2$ respectively.
 
 \subsection{Implementation in BSPLINES}
 The PP representation is selected by default in BSPLINES,
 \emph{unless} the logical keyword {\tt NLPPFORM} is set to
 {\tt .FALSE.} when calling the spline initialization routine {\tt
   SET\_SPLINE}. The flop counts per point for both methods are
 summarized in the table below
 \begin{center}
 \begin{tabular}{|l|c|c|}
   \hline
                  & 1D  & 2D \\\hline
 Spline expansion & $2(p+1)^{2}$ & $2(3p+2)(p+1)$ \\
 PP representation & $2p$      &   $2p(p+2)$ \\\hline
 \end{tabular}
 \end{center}
 The routine {\tt GRIDVAL} computes the value of the
 field or one of its derivatives. The first call to this routine
 computes the PP coefficients $\Pi$ if {\tt NLPPFORM=.TRUE.} is
 selected or just store the spline coefficients $c$ in the spline
 internal data otherwise. In the following calls to {\tt GRIDVAL}, $c$
 should not be passed to the routines.
 
 Notice that the PP representation requires to store the $N^2(p+1)^2$
 PP coefficients in the 2D case, which is still acceptable. In the 3D
 case, this storage requirement becomes $N^3(p+1)^3$  which can be
 prohibitive! In this case the less efficient \emph{Spline expansion}
 formulation should be selected.
 
 In the \emph{particle loop}, the routine {\tt GETGRAD} which computes
 the function and all its first partial derivatives at once should be
 called instead of {\tt GRIDVAL}.
 
 \section{Particle localization({\tt locintv})}
 In both charge assignment and field interpolation, finding in which
 interval of the spatial grid the particle is localized should be first
 performed. This operation is trivial for the case of an equidistant
 mesh. For non-equidistant mesh, an \emph{equidistant fine} mesh and its mapping to the
 actual mesh are first constructed in the spline initialization routine
 {\tt SET\_SPLINE} and used to localize the particles in the routine
 {\tt LOCINTV}.
 
 \section{Performances}
 From the considerations above, using BSPLINES to perform the charge
 assignment and field interpolation in 2D and 3D particle codes might
 result in large overheads because of the large number of calls to the
 routines {\tt BASFUN} to compute the splines or {\tt GETGRAD} to perform the field
 interpolation at a \emph{single}  particle position. In the following,
 the performances the 2D linearized gyrokinetic code GYGLES which has
 been adapted to use BSPLINES are analyzed. Vectorization by grouping
 the particles for both charge assignment and field interpolation is then
 proposed as a way to speed up these two operations when using
 BSPLINES.
 
 \subsection{Scalar performances}
 Optimization of the scalar versions of {\tt BASFUN} and {\tt GETGRAD}
 (when these routines are called with a \emph{single} particle) is
 performed essentially by
 \begin{itemize}
 \item Minimizing the flop counts and reducing redundant operations.
 \item Unrolling small loops, for example the loop over the $p+1$
   splines that are non-zero at a given position, for small $p$.
 \item Define all routines called by {\tt BASFUN} and {\tt  GETGRAD} as
   \emph{internal procedures}.
 \item Rearranging the memory layout of the multi-dimension array
   containing the PP coefficients of the spline.
 \end{itemize}
 
 The timings of the charge and current assignment (assign), the particle
 pusher (push) and the main time loop for a 5 time step run of GYGLES,
 on an Intel Xeon X5570 (hpcff.fz-juelich.de), using 4 MPI
 processes and Intel Fortran 12.1.2 are summarized in the following
 table
 
 \begin{center}
 \begin{tabular}{lrrrrr}
 \hline
            &  $T_0$(s)   &  $T_1$(s)   &  $T_2$(s)   &  $T_1/T_0$   &  $T_2/T_1$  \\
 \hline
  assign    &  1.454E+01  &  2.126E+01  &  2.259E+01  &       1.46  &       1.06  \\
  push      &  2.536E+01  &  3.080E+01  &  3.144E+01  &       1.21  &       1.02  \\
  mainloop  &  4.197E+01  &  5.955E+01  &  6.149E+01  &       1.42  &       1.03  \\
 \hline
 \end{tabular}
 \end{center}
 
 where $T_0$ is the time in seconds obtained with the original code
 while $T_1$ and $T_2$ are the times obtained with BSPLINES,
 respectively using an \emph{equidistant} and \emph{non-equidistant}
 radial mesh. In all the 3 runs, a quadratic splines were used.
 The small difference between \emph{equidistant} and
 \emph{non-equidistant} mesh comes mainly from the particle localization.
 
 The same run on an Intel Xeon E5-2680 (helios.iferc-csc.org), using
 the same Intel compiler (with AVX instructions) yields
 
 \begin{center}
 \begin{tabular}{lrrrrr}
 \hline
            &  $T_0$(s)   &  $T_1$(s)   &  $T_2$(s)   &  $T_1/T_0$   &  $T_2/T_1$  \\
 \hline
  assign    &  1.093E+01  &  1.987E+01  &  2.086E+01  &       1.82  &       1.05  \\
  push      &  2.385E+01  &  2.868E+01  &  2.994E+01  &       1.20  &       1.04  \\
  mainloop  &  3.656E+01  &  5.411E+01  &  5.598E+01  &       1.48  &       1.03  \\
 \hline
 \end{tabular}
 \end{center}
 
 
 \subsection{Speed up by vectorization}
 As found in the last section, using external routines from BSPLINES
 instead of \emph{hard coding} the spline computations
 results in a slowing down of 40--50\% for the main time
 loop. As will shown later, this problem could be solved by \emph{grouping} the
 particles and using the vectorized {\tt BASFUN} and {\tt GETGRAD}
 routines. Such particle grouping can be done for example, by replacing the usual
 particle loop by the following Fortran code fragment
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{verbatim}
   nset = npart/ngroup
   IF(MODULO(npt, ngroup).NE.0) nset = nset+1
   i2 = 0
   DO is=1,nset
     i1 = i2+1
     i2 = MIN(i2+ngroup,npart)
     CALL basfun(x(i1:i2), ...)
   END DO
 \end{verbatim}
 \nopagebreak\hrule
 \addvspace{\medskipamount}
 
 where {\tt npart} particles are partitioned into {\tt nset} groups,
 each containing at most {\tt ngroup} particles. Vectorization of the
 routines {\tt BASFUN} and {\tt GETGRAD} is achieved by moving whenever
 is possible the loop over the {\tt ngroup} particles into the
 innermost loop.
 
 The vectorization performances shown in Fig~.\ref{fig:basfun_hpcff} and
 Fig~.\ref{fig:getgrad_hpcff}, respectively for {\tt BASFUN} and {\tt
 GETGRAD} are obtained using version $12.1.2$ of Intel compiler on
 an Intel Xeon X5570 (hpcff.fz-juelich.de). With a speedup of at least
 2 for quadratic splines, the slowing down found previously in the
 scalar version could be likely compensated. The new AVX instructions
 present in the recent Intel Xeon E5-2680 (helios.iferc-csc.org) seems
 to improve somewhat the vectorization performance as shown in
 Fig~.\ref{fig:basfun_helios} and Fig~.\ref{fig:getgrad_helios}.
 
 \begin{figure}
   \centering
   \includegraphics[angle=0,width=\hsize]{basfun_perf_hpcff}
   \caption{In this test, $10^5$ particles are distributed randomly on
     an equidistant mesh of 64 intervals. On each point, all the $p+1$
     splines are computed. The particle localization routine {\tt
     locintv} is included in the timing. In order to have a good
     statistics in the measurements, $1'000$ iterations of the particle loop are considered.}
   \label{fig:basfun_hpcff}
 \end{figure}
 
 \begin{figure}
   \centering
   \includegraphics[angle=0,width=\hsize]{getgrad_perf_hpcff}
   \caption{In this test, $10^5$ particles are distributed randomly on
     an equidistant 2D $(x,y)$ mesh of $64\times 64$ intervals, where
     the coordinate $y$ is periodic. On each point, the function
     together with its gradient are computed, using the PP
     representation. The particle localization routine {\tt
     locintv} is included in the timing. In order to have a good
     statistics in the measurements, $100$ iterations of the particle loop are considered.}
   \label{fig:getgrad_hpcff}
 \end{figure}
 
 \begin{figure}
   \centering
   \includegraphics[angle=0,width=\hsize]{basfun_perf_helios}
   \caption{In this test, $10^5$ particles are distributed randomly on
     an equidistant mesh of 64 intervals. On each point, all the $p+1$
     splines are computed. The particle localization routine {\tt
     locintv} is included in the timing. In order to have a good
     statistics in the measurements, $1'000$ iterations of the particle loop are considered.}
   \label{fig:basfun_helios}
 \end{figure}
 
 \begin{figure}
   \centering
   \includegraphics[angle=0,width=\hsize]{getgrad_perf_helios}
   \caption{In this test, $10^5$ particles are distributed randomly on
     an equidistant 2D $(x,y)$ mesh of $64\times 64$ intervals, where
     the coordinate $y$ is periodic. On each point, the function
     together with its gradient are computed, using the PP
     representation. The particle localization routine {\tt
     locintv} is included in the timing. In order to have a good
     statistics in the measurements, $100$ iterations of the particle loop are considered.}
   \label{fig:getgrad_helios}
 \end{figure}
 
 \begin{thebibliography}{99}
 \bibitem{BSPLINES} {\tt BSPLINES} Reference Guide.
 \end{thebibliography}
 
 \end{document}
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 4169dde..4b28ada 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -1,77 +1,77 @@
 #
 # @file CMakeLists.txt
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Nicolas Richart <nicolas.richart@epfl.ch>
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 project(bsplines_tests)
 
 add_library(local_util STATIC
   pde1dp_mod.f90
   pde3d_mod.f90
   ppde3d_mod.f90
   ppde3d_pb_mod.f90
   tcdsmat_mod.f90
   meshdist.f90
   dismat.f90
   ibcmat.f90
   disrhs.f90
 )
 
 target_link_libraries(local_util PUBLIC bsplines)
 
 set(BS_TESTS
   driv1 driv2 driv3 driv4
   pde1d pde1dp pde1dp_cmpl
   pde2d pde2d_pb
   pde1dp_cmpl_dft
   pde3d ppde3d ppde3d_pb
   fit1d fit1dbc fit1dp
   fit2d fit2d1d fit2d_cmpl fit2dbc fit2dbc_x fit2dbc_y
   moments optim1 optim2 optim3
   tcdsmat tmassmat tbasfun tsparse1
   basfun_perf getgrad_perf gridval_perf
   test_kron
   )
 
 if(HAS_PARDISO)
   set(BS_TESTS ${BS_TESTS}
     pde1dp_cmpl_pardiso
     pde2d_pardiso
     pde2d_sym_pardiso
     pde2d_sym_pardiso_dft
     tsparse2
     )
 endif()
 
 if(HAS_MUMPS)
   set(BS_TESTS ${BS_TESTS}
     pde2d_mumps
     pde1dp_cmpl_mumps
     )
 endif()
 
 foreach(test ${BS_TESTS})
   add_executable(${test} ${test}.f90)
   target_link_libraries(${test} local_util ${LIBS} ${EXTRA_LIBS})
 endforeach()
diff --git a/examples/Makefile b/examples/Makefile
index 552d03f..0e45321 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -1,506 +1,506 @@
 #
 # @file Makefile
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Emmanuel Lanti <emmanuel.lanti@epfl.ch>
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 PREFIX=$(HOME)
 XGRAFIX=/usr/local/xgrafix_1.2/src-double
 # FUTILS=/usr/local/crpp/futils
 # BSPLINES=/usr/local/crpp/bsplines
 # PPUTILS2=../pputils2
 # PPPACK=../pppack
 # SLATEC=/usr/local/slatec
 # FFTW=/usr/local/fftw-2.1.5-opt
 ARPACK=/usr/local/ARPACK
 LAPACK95=$(MKL)/../../../mkl/include/intel64/lp64 
 
 MPIF90 = mpif90
 F90 = ifort
 LD = $(MPIF90)
 
 debug = -g -traceback -check bounds -warn alignments -warn unused
 optim = -O3 -xHOST
 
 OPT=$(debug)
 #OPT=$(optim)
 
 F90FLAGS = $(OPT) -fPIC  -I../fft -I$(BSPLINES)/include -I$(FUTILS)/include \
 	   -I$(FFTW)/include -I$(MKL)/../../include/intel64/lp64
 LDFLAGS = $(OPT) -fPIC  -L$(FUTILS)/lib  -L$(BSPLINES)/lib -L${HDF5}/lib -L$(FFTW)/lib64 \
           -L$(SLATEC)/lib -L$(ARPACK)
 
 CC = cc
 CFLAGS = -O2
 
 LIBS =  -mkl=cluster -lbsplines -lpppack -lpppack -lpputils2 -lfutils -lfft -larpack \
 	-lfftw -lhdf5_fortran -lhdf5 -lz -lpthread
 
 LIBS1 = -mkl=cluster -lbsplines1 -lpppack -lfutils \
         -lhdf5_fortran -lhdf5 -lz -lsz -lpthread
 
 ifdef WSMP
 LDFLAGS += -L$(WSMP)
 LIBS += -lwsmp64
 LIBS1 += -lpwsmp64
 endif
 
 ifdef MUMPS
 F90FLAGS += -I$(MUMPS)/include -I$(LAPACK95)
 LDFLAGS += -L$(MUMPS)/lib -L$(PARMETIS)/lib
 LIBS += $(MUMPSLIBS)
 endif
 
 ifdef PETSC_DIR
 include ${PETSC_DIR}/conf/variables
 F90FLAGS += -I$(PETSC_DIR)/include  -I$(PETSC_DIR)/$(PETSC_ARCH)/include \
             -I$(MKL)/../../include
 LIBS += ${PETSC_FORTRAN_LIB} ${PETSC_KSP_LIB}
 endif
 
 PDE1DOBJS = pde1d.o 
 PDE2DOBJS = pde2d.o dismat.o disrhs.o ibcmat.o
 FIT1DOBJJS = fit1d.o
 
 .SUFFIXES:
 .SUFFIXES: .o .c .f90 .f
 
 .f90.o:
 	$(MPIF90) $(F90FLAGS) -c $<
 .f.o:
 	$(F90) $(F90FLAGS) -c $<
 
 all: examples tmat
 
 EX_FILES = driv1 driv2 driv3 driv4 pde1d pde1dp pde1dp_cmpl pde3d ppde3d ppde3d_pb \
           pde2d pde2d_pb fit1d fit1dbc \
           fit1dp fit2d fit2d1d fit2d_cmpl fit2dbc fit2dbc_x fit2dbc_y \
 	  moments optim1 optim2 optim3 tmassmat tbasfun tcdsmat tsparse1 tsparse2 \
           pde2d_pardiso pde2d_sym_pardiso pde1dp_cmpl_pardiso pde1dp_cmpl_dft \
           pde2d_sym_pardiso_dft \
 	  pde1d_eig_csr pde1d_eig_gb pde1d_eig_ge
 
 ifdef WSMP
 EX_FILES += pde2d_wsmp pde2d_pwsmp pde2d_sym_wsmp  pde1dp_cmpl_wsmp
 endif
 
 ifdef MUMPS
 EX_FILES += pde2d_mumps pde1dp_cmpl_mumps pde1d_eig_zmumps
 endif
 
 ifdef PETSC_DIR
 EX_FILES += pde2d_petsc
 endif
 
 examples: $(EX_FILES)
 
 tmat:     tmatrix_gb tmatrix_pb tmatrix_zpb
 
 adv:	adv.o extra.o
 	$(LD) $(LDFLAGS) -L$(XGRAFIX) -o $@ $< extra.o $(LIBS) \
         -lfftw -lXGF -lXGC -lX11
 	cp -p $@ ../bin/
 
 driv1:	driv1.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 driv2:	driv2.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 driv3:	driv3.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 driv4:	driv4.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 pde1d:	$(PDE1DOBJS)
 	$(LD) $(LDFLAGS) -o $@ $(PDE1DOBJS) $(LIBS)
 	cp -p $@ ../bin/
 
 pde1d_eig_csr:	pde1d_eig_csr.o
 	$(LD) $(LDFLAGS) -o $@ 	pde1d_eig_csr.o $(LIBS) -lpputils2
 	cp -p $@ ../bin/
 
 pde1d_eig_zcsr:	pde1d_eig_zcsr.o
 	$(LD) $(LDFLAGS) -o $@ 	pde1d_eig_zcsr.o $(LIBS)
 	cp -p $@ ../bin/
 
 pde1d_eig_zmumps: pde1d_eig_zmumps.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS) -lpputils2
 	cp -p $@ ../bin/
 
 pde1d_eig_zcsr:	pde1d_eig_zcsr.o
 	$(LD) $(LDFLAGS) -o $@ 	pde1d_eig_zcsr.o $(LIBS)
 	cp -p $@ ../bin/
 
 pde1d_eig_gb:	pde1d_eig_gb.o
 	$(LD) $(LDFLAGS) -o $@ 	pde1d_eig_gb.o $(LIBS)
 	cp -p $@ ../bin/
 
 pde1d_eig_ge:	pde1d_eig_ge.o
 	$(LD) $(LDFLAGS) -o $@ 	pde1d_eig_ge.o $(LIBS)
 	cp -p $@ ../bin/
 
 pde1dp:	pde1dp.o pde1dp_mod.o
 	$(LD) $(LDFLAGS) -o $@ pde1dp.o pde1dp_mod.o $(LIBS)
 	cp -p $@ ../bin/
 
 pde1dp_cmpl:	pde1dp_cmpl.o
 	$(LD) $(LDFLAGS) -o $@ pde1dp_cmpl.o $(LIBS)
 	cp -p $@ ../bin/
 
 pde1dp_KA:	pde1dp_KA.o pde1dp_mod_KA.o
 	$(LD) $(LDFLAGS) -o $@ pde1dp_KA.o pde1dp_mod_KA.o $(LIBS)
 	cp -p $@ ../bin/
 
 pde2d:	$(PDE2DOBJS)
 	$(LD) $(LDFLAGS) -o $@ $(PDE2DOBJS) $(LIBS)
 	cp -p $@ ../bin/
 
 pde3d:	pde3d.o pde3d_mod.o
 	$(LD) $(LDFLAGS) -o $@ pde3d.o pde3d_mod.o $(LIBS)
 	cp -p $@ ../bin/
 
 ppde3d:	ppde3d.o ppde3d_mod.o
 	$(LD) $(LDFLAGS) -o $@ ppde3d.o ppde3d_mod.o $(LIBS)
 	cp -p $@ ../bin/
 
 ppde3d_pb:	ppde3d_pb.o ppde3d_pb_mod.o
 	$(LD) $(LDFLAGS) -o $@ ppde3d_pb.o ppde3d_pb_mod.o $(LIBS)
 	cp -p $@ ../bin/
 
 pde2d_pb:	pde2d_pb.o
 	$(LD) $(LDFLAGS) -o $@ pde2d_pb.o $(LIBS)
 	cp -p $@ ../bin/
 
 pde2d_nh:	pde2d_nh.o
 	$(LD) $(LDFLAGS) -o $@ pde2d_nh.o $(LIBS)
 	cp -p $@ ../bin/
 
 tcdsmat: tcdsmat.o tcdsmat_mod.o meshdist.o
 	$(LD) $(LDFLAGS) -o $@ tcdsmat.o tcdsmat_mod.o meshdist.o $(LIBS)
 	cp -p $@ ../bin/
 
 tmatrix_pb:	tmatrix_pb.o 
 	$(LD) $(LDFLAGS) -o $@ tmatrix_pb.o $(LIBS)
 	cp -p $@ ../bin/
 
 tmatrix_zpb:	tmatrix_zpb.o
 	$(LD) $(LDFLAGS) -o $@ tmatrix_zpb.o $(LIBS)
 	cp -p $@ ../bin/
 
 tmatrix_gb:	tmatrix_gb.o
 	$(LD) $(LDFLAGS) -o $@ tmatrix_gb.o $(LIBS)
 	cp -p $@ ../bin/
 
 fit1d:	$(FIT1DOBJJS)
 	$(LD) $(LDFLAGS) -o $@ $(FIT1DOBJJS) $(LIBS)
 	cp -p $@ ../bin/
 
 fit1d_cmpl: fit1d_cmpl.o
 	$(LD) $(LDFLAGS) -o $@ fit1d_cmpl.o $(LIBS)
 	cp -p $@ ../bin/
 
 gyro: gyro.o
 	$(LD) $(LDFLAGS) -o $@ gyro.o -lslatec $(LIBS)
 	cp -p $@ ../bin/
 
 fit1dbc:	fit1dbc.o
 	$(LD) $(LDFLAGS) -o $@ fit1dbc.o $(LIBS)
 	cp -p $@ ../bin/
 
 fit1dp:	fit1dp.o
 	$(LD) $(LDFLAGS) -o $@ fit1dp.o $(LIBS)
 	cp -p $@ ../bin/
 
 fit2d:	fit2d.o meshdist.o
 	$(LD) $(LDFLAGS) -o $@ fit2d.o meshdist.o $(LIBS)
 	cp -p $@ ../bin/
 
 fit2d1d:	fit2d1d.o meshdist.o
 	$(LD) $(LDFLAGS) -o $@ fit2d1d.o meshdist.o $(LIBS)
 	cp -p $@ ../bin/
 
 fit2d_cmpl:	fit2d_cmpl.o meshdist.o
 	$(LD) $(LDFLAGS) -o $@ fit2d_cmpl.o meshdist.o $(LIBS)
 	cp -p $@ ../bin/
 
 fit2dbc:	fit2dbc.o meshdist.o
 	$(LD) $(LDFLAGS) -o $@ fit2dbc.o meshdist.o $(LIBS)
 	cp -p $@ ../bin/
 
 fit2dbc_x:	fit2dbc_x.o meshdist.o
 	$(LD) $(LDFLAGS) -o $@ fit2dbc_x.o meshdist.o $(LIBS)
 	cp -p $@ ../bin/
 
 fit2dbc_y:	fit2dbc_y.o meshdist.o
 	$(LD) $(LDFLAGS) -o $@ fit2dbc_y.o meshdist.o $(LIBS)
 	cp -p $@ ../bin/
 
 moments:	moments.o
 	$(LD) $(LDFLAGS) -o $@ moments.o $(LIBS)
 	cp -p $@ ../bin/
 
 mesh:	mesh.o meshdist.o
 	$(LD) $(LDFLAGS) -o $@ mesh.o meshdist.o $(LIBS)
 	cp -p $@ ../bin/
 
 optim1:	optim1.o
 	$(LD) $(LDFLAGS) -o $@ optim1.o $(LIBS)
 	cp -p $@ ../bin/
 
 optim2:	optim2.o
 	$(LD) $(LDFLAGS) -o $@ optim2.o $(LIBS)
 	cp -p $@ ../bin/
 
 optim3:	optim3.o
 	$(LD) $(LDFLAGS) -o $@ optim3.o $(LIBS)
 	cp -p $@ ../bin/
 
 tmassmat:	tmassmat.o
 	$(LD) $(LDFLAGS) -o $@ tmassmat.o $(LIBS)
 	cp -p $@ ../bin/
 
 tbasfun:	tbasfun.o
 	$(LD) $(LDFLAGS) -o $@ tbasfun.o $(LIBS)
 	cp -p $@ ../bin/
 
 basfun_perf:	basfun_perf.o
 	$(LD) $(LDFLAGS) -o $@ basfun_perf.o $(LIBS)
 	cp -p $@ ../bin/
 
 gridval_perf:	gridval_perf.o
 	$(LD) $(LDFLAGS) -o $@ gridval_perf.o $(LIBS)
 	cp -p $@ ../bin/
 
 getgrad_perf:	getgrad_perf.o
 	$(LD) $(LDFLAGS) -o $@ getgrad_perf.o $(LIBS)
 	cp -p $@ ../bin/
 
 basfun_perf1:	basfun_perf1.o
 	$(LD) $(LDFLAGS) -o $@ basfun_perf1.o $(LIBS)
 	cp -p $@ ../bin/
 
 tlocintv:	tlocintv.o  meshdist.o
 	$(LD) $(LDFLAGS) -o $@ tlocintv.o meshdist.o $(LIBS)
 	cp -p $@ ../bin/
 
 tgausleg:	tgausleg.o  meshdist.o
 	$(LD) $(LDFLAGS) -o $@ tgausleg.o meshdist.o $(LIBS)
 	cp -p $@ ../bin/
 
 poisson:	poisson.o 
 	$(LD) $(LDFLAGS) -o $@ poisson.o  $(LIBS)
 	cp -p $@ ../bin/
 
 poisson_mumps:	poisson_mumps.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 poisson_petsc:	poisson_petsc.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 tsparse1: tsparse1.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 tsparse2: tsparse2.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 pde2d_pardiso: pde2d_pardiso.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 pde2d_mumps: pde2d_mumps.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)	
 	cp -p $@ ../bin/
 
 pde2d_petsc: pde2d_petsc.o $(PPUTILS2)/pputils2.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)	
 	cp -p $@ ../bin/
 
 pde2d_sym_pardiso: pde2d_sym_pardiso.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 pde2d_sym_pardiso_dft: pde2d_sym_pardiso_dft.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 pde2d_sym_wsmp_dft: pde2d_sym_wsmp_dft.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 zssmp_ex1: zssmp_ex1.o
 	$(LD) $(LDFLAGS) -o $@ $<  $(LIBS)
 	cp -p $@ ../bin/
 
 zpardiso_ex1: zpardiso_ex1.o
 	$(LD) $(LDFLAGS) -o $@ $<  $(LIBS)
 	cp -p $@ ../bin/
 
 pde2d_sym_wsmp: pde2d_sym_wsmp.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 pde2d_wsmp: pde2d_wsmp.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 pde2d_pwsmp: pde2d_pwsmp.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS1)
 	cp -p $@ ../bin/
 
 pde1dp_cmpl_pardiso:	pde1dp_cmpl_pardiso.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 pde1dp_cmpl_mumps:	pde1dp_cmpl_mumps.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 pde1dp_cmpl_dft:	pde1dp_cmpl_dft.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 pde1dp_cmpl_wsmp:	pde1dp_cmpl_wsmp.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 tspline:	tspline.o
 	$(LD) $(LDFLAGS) -o $@ tspline.o $(LIBS)
 	cp -p $@ ../bin/
 
 tpsum_mat: tpsum_mat.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 tp2p_mat: tp2p_mat.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 	cp -p $@ ../bin/
 
 test_pwsmp:	test_pwsmp.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS1)
 	cp -p $@ ../bin/
 
 driv1.o:
 driv2.o:
 driv3.o:
 driv4.o:
 pde1d.o:
 pde1dp.o: pde1dp_mod.o
 pde1dp_cmpl.o:
 pde1dp_mod.o:
 pde1dp_KA.o: pde1dp_mod_KA.o
 pde1dp_mod_KA.o:
 pde2d.o:
 pde3d.o: pde3d_mod.o
 pde3d_mod.o:
 ppde3d.o: ppde3d_mod.o
 ppde3d_mod.o:
 ppde3d_pb.o: ppde3d_pb_mod.o
 ppde3d_pb_mod.o:
 pde2d_pb.o:
 pde2d_nh.o:
 tcdsmat.o: tcdsmat_mod.o
 tcdsmat_mod.o:
 fit1d.o:
 fit1d_cmpl.o:
 gyro.o: 
 fit1dbc.o:
 fit1dp.o:
 fit2d.o:
 fit2d_cmpl.o:
 fit2dbc.o:
 dismat.o:
 disrhs.o:
 ibcmat.o:
 adv.o:
 tmatrix_pb.o:
 tmatrix_zpb.o:
 tmatrix_gb.o:
 moments.o:
 mesh.o:
 optim1.o:
 optim2.o:
 optim3.o:
 tmassmat.o:
 tbasfun.o:
 basfun_perf.o:
 basfun_perf1.o:
 tlocintv.o:
 tgausleg.o:
 poisson.o:
 poisson_mumps.o:
 tsparse1.o:
 tsparse2.o:
 pde2d_pardiso.o:
 pde2d_mumps.o:
 pde2d_petsc.o:
 pde2d_sym_pardiso.o:
 pde2d_sym_pardiso_dft.o:
 pde2d_sym_wsmp_dft.o:
 pde2d_wsmp.o:
 pde2d_pwsmp.o:
 pde2d_sym_wsmp.o:
 pde1dp_cmpl_pardiso.o:
 pde1dp_cmpl_mumps.o:
 pde1dp_cmpl_dft.o:
 pde1dp_cmpl_wsmp.o:
 tpsum_mat.o:
 tp2p_mat.o:
 poisson_petsc.o:
 
 tags:
 	etags *.f *.f90 ../src/*.f90 $(PPPACK)/*.f90
 
 clean:
 	rm -f *.o *.mod *~ a.out
 
 distclean: clean
 # $(MAKE) -C ../src distclean
 # $(MAKE) -C ../fft distclean
 # $(MAKE) -C $(PPUTILS2) distclean
 	rm -f *.a *.mod pde1d pde1dp pde1dp_cmpl pde1dp_KA driv1 driv2 driv3 driv4 \
               tmatrix_pb tmatrix_gb tmatrix_zpb \
               pde2d pde2d_pb pde2d_nh pde3d ppde3d ppde3d_pb\
               fit1d fit1d_cmpl gyro fit1dbc fit1dp \
               fit2d  fit2d1d fit2d_cmpl fit2dbc \
               fit2dbc_x fit2dbc_y adv moments tcdsmat poisson poisson_mumps\
               mesh optim1 optim2 optim3 tmassmat tbasfun \
               basfun_perf gridval_perf getgrad_perf tlocintv \
 	      tsparse1 tsparse2 \
               pde2d_pardiso pde2d_sym_pardiso pde2d_wsmp pde2d_sym_wsmp \
               pde1dp_cmpl_dft pde1dp_cmpl_wsmp pde1d_eig_csr pde1d_eig_pb pde1d_eig_ge \
               pde2d_sym_pardiso_dft pde1dp_cmpl_pardiso \
 	      pde2d_mumps pde1dp_cmpl_mumps tpsum_mat tp2p_mat pde2d_sym_wsmp_dft \
 	      poisson_petsc pde2d_petsc \
               pde1d_eig_csr pde1d_eig_zcsr pde1d_eig_zmumps pde1d_eig_gb pde1d_eig.ge \
               ../bin/*
 
diff --git a/examples/adv.f90 b/examples/adv.f90
index 8c40a29..c3fc591 100644
--- a/examples/adv.f90
+++ b/examples/adv.f90
@@ -1,320 +1,320 @@
 !>
 !> @file adv.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   1d Periodic Advection: F(x,t) = F(x-u*dt,t-dt)
 !   using module bsplines
 !
   USE bsplines
 !
   IMPLICIT NONE
   INCLUDE 'fftw_f77.h'
   TYPE(spline1d) :: spl
   INTEGER, PARAMETER :: nhistmx=1000, ncomb=4
   INTEGER :: nx, nidbas, dim
   INTEGER :: nstep, nskipt, nhist, mhist
   DOUBLE PRECISION :: a, b, dt, u, w,  coefx(5)
   DOUBLE PRECISION, DIMENSION(0:nhistmx) :: thist, tmass, tfmin, tfmax, ermass
   DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:), xshft(:)
   DOUBLE PRECISION, ALLOCATABLE :: coefs(:), ferr(:), kx(:), ampl(:)
   DOUBLE COMPLEX, ALLOCATABLE :: cfgrid(:), ffft(:)
   INTEGER(8) :: forw
   DOUBLE PRECISION :: time
   INTEGER :: i
   NAMELIST /newrun/ nx, nidbas, a, b, dt, u, w,  coefx
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 100    ! Number of intevals in x
   a = 0.0     ! Left boundary of interval
   b = 100.0   ! Right boundary of interval
   dt = 0.1    ! Time step
   u = 1.0     ! Velocity
   w = 2.0     ! Shape of initial function
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !===========================================================================
 !              2.0 Define initial conditions
 !
 !   Set up mesh
 !
   ALLOCATE(xgrid(0:nx), xshft(0:nx), fgrid(0:nx), ferr(0:nx))
   xgrid(0) = a
   xgrid(nx) = b
   CALL meshdist(coefx, xgrid, nx)
 !
 !   Set up the spline interpolation
 !
   CALL set_splcoef(nidbas, xgrid, spl, period=.TRUE.)
   CALL get_dim(spl, dim)
   WRITE(*,'(a,i6)') 'dimension of splines', dim
   ALLOCATE(coefs(dim))
 !
 !   Initial conditions
 !
   time = 0.0d0
   nstep = 0
   nskipt = 1
   DO i=0,nx-1
      fgrid(i) = finit(xgrid(i))
   END DO
   fgrid(nx) = fgrid(0)
   ferr = 0.0
   CALL get_splcoef(spl, fgrid, coefs)
   WRITE(*,'(a/(10f8.3))') 'knots', spl%knots
   WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
   WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid
   WRITE(*,'(a/(10f8.3))') 'coefs', coefs
 !
 !   Set up FFT
 !
   ALLOCATE(kx(-nx/2+1:nx/2), ampl(-nx/2+1:nx/2))
   ALLOCATE(cfgrid(0:nx-1), ffft(0:nx-1))
   DO i=-nx/2+1,nx/2
      kx(i) = i
   END DO
   cfgrid(0:nx-1) = fgrid(0:nx-1)
   CALL fftw_f77_create_plan(forw, nx, FFTW_FORWARD, FFTW_ESTIMATE)
   CALL fftw_f77_one(forw, cfgrid, ffft)
   ampl(0:nx/2) = ABS(ffft(0:nx/2))
   ampl(-nx/2+1:-1) = ABS(ffft(nx/2+1:nx-1))
 !
 !   Set up history arrays
 !
   nhist = 0
   thist(nhist) = 0.0
 !!$  tmass(nhist) = SUM(coefs(1:nx))
   tmass(nhist) = SUM(fgrid(1:nx))
   tfmin(nhist) = MINVAL(fgrid)
   tfmax(nhist) = MAXVAL(fgrid)
   ermass(nhist) = tmass(nhist)-tmass(0)
   WRITE(*,'(a,(10f8.3))') 'Initial mass', tmass(nhist)
 !
 !  Initialize Xgrafix
 !
   CALL xginit(3,'ADV','adv',' ',' ',' ',' ',time)
 !
   CALL xgset2d('linlin', 'X', 'F', 'open', 220, 60, 1.d0, 1.d0, &
        &       .FALSE., .FALSE., xgrid(0), xgrid(nx), -0.2d0, 1.2d0)
   CALL xgcurve(xgrid, fgrid, nx+1, 1)
 !
   CALL xgset2d('linlin', 'X', 'FERR', 'open', 620, 60, 1.d0, 1.d0, &
        &       .FALSE., .FALSE., xgrid(0), xgrid(nx), -1.d0, 1.d0)
   CALL xgcurve(xgrid, ferr, nx+1, 1)
 !
   CALL xgset2d('linlin', 'Time', 'Error Mass', 'open', 820, 400, 1.d0, 1.d0, &
        &       .TRUE., .TRUE., 0.d0, 1.d0, 0.d0, 1.d0)
   CALL xgcurve(thist, ermass, nhist, 1)
 !
   CALL xgset2d('linlin', 'Time', 'Min/Max', 'open', 420, 400, 1.d0, 1.d0, &
        &       .TRUE., .TRUE., 0.d0, 1.d0, 0.d0, 1.d0)
   CALL xgcurve(thist, tfmin, nhist, 1)
   CALL xgcurve(thist, tfmax, nhist, 2)
 !
   CALL xgset2d('linlin', 'kx', 'Amplitude of F', 'open', 20, 400, 1.d0, 1.d0, &
        &       .FALSE., .FALSE., kx(-nx/2+1), kx(nx/2), &
        &        0.0d0, MAXVAL(ampl))
   CALL xgcurve(kx, ampl, nx, 1)
 !
   CALL xgupdate
 !===========================================================================
 !              3.0 Time loop
 !
   nskipt = 1
   DO
      nstep = nstep+1
      time = time+dt
      CALL xgevent
 !
 !   Shift x
 !
      CALL get_splcoef(spl, fgrid, coefs)
      xshft(0:nx) = xgrid(0:nx) - u*dt
      CALL gridval(spl, xshft, fgrid, 0, coefs)
 !
      xshft(0:nx) = xgrid(0:nx) - u*time
      DO i =0,nx
         ferr(i) = fgrid(i) - finit(xshft(i))
      END DO
 !
      cfgrid(0:nx-1) = fgrid(0:nx-1)
      CALL fftw_f77_one(forw, cfgrid, ffft)
      ampl(0:nx/2) = ABS(ffft(0:nx/2))
      ampl(-nx/2+1:-1) = ABS(ffft(nx/2+1:nx-1))
 !
 !   Diagnostics
 !
      IF( MOD(nstep,nskipt) .EQ. 0 ) THEN
         nhist = nhist+1
         IF( nhist .GT. nhistmx ) THEN
            nskipt = ncomb*nskipt
            mhist = nhist-1
            CALL packarr(mhist, thist, ncomb, nhist)
            CALL packarr(mhist, tmass, ncomb, nhist)
            CALL packarr(mhist, tfmin, ncomb, nhist)
            CALL packarr(mhist, tfmax, ncomb, nhist)
            CALL packarr(mhist, ermass, ncomb, nhist)
         END IF
         thist(nhist) = time
         tmass(nhist) = SUM(fgrid(1:nx))
 !!$        tmass(nhist) = SUM(coefs(1:nx))
         tfmin(nhist) = MINVAL(fgrid)
         tfmax(nhist) = MAXVAL(fgrid)
         ermass(nhist) = (tmass(nhist)-tmass(0))/tmass(0)
      END IF
 !     
      CALL xgupdate
   END DO
 !===========================================================================
 !              9.0 Prologue
 !
   CALL fftw_f77_destroy_plan(forw)
   DEALLOCATE(xgrid, fgrid, xshft, coefs)
 CONTAINS
   DOUBLE PRECISION FUNCTION finit(xx)
 !
 !   A "box" function
 !
     DOUBLE PRECISION, INTENT(in) :: xx
     DOUBLE PRECISION :: xl, xr, xl0, xr0, h, x, xlen
     INTEGER :: kl, kr, klflag, krflag
 !
     xlen = b-a
     x = a + MODULO(xx-a+xlen, xlen)
 !
     xl = 0.375*(b-a)
     xr = 0.624*(b-a)
     CALL interv(xgrid, nx+1, xl, kl, klflag)
     CALL interv(xgrid, nx+1, xr, kr, krflag)
     xl0 = xl + w*(xgrid(kl)-xgrid(kl-1))
     xr0 = xr - w*(xgrid(kr)-xgrid(kr-1))
     CALL interv(xgrid, nx+1, xl0, kl, klflag)
     CALL interv(xgrid, nx+1, xr0, kr, krflag)
     IF( x .LT. xl0 ) THEN
        h = xgrid(kl)-xgrid(kl-1)
        finit = EXP(-((x-xl0)/(w*h))**2)
     ELSE IF( x .GT. xr0) THEN
        h = xgrid(kr)-xgrid(kr-1)
        finit = EXP(-((x-xr0)/(w*h))**2)
     ELSE
        finit = 1.0d0
     END IF
   END FUNCTION finit
 
 END PROGRAM main
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
 
 SUBROUTINE packarr(n, arr, skip, nhist)
   IMPLICIT NONE
   INTEGER :: n, skip, i, ii, nhist
   DOUBLE PRECISION :: arr(0:n)
   ii = 0
   DO i=0,n,skip
      arr(ii) = arr(i)
      ii=ii+1
   END DO
   nhist = ii
 END SUBROUTINE packarr
 !+++
 
 SUBROUTINE dump(filename, l)
 !
 !    Is invoked when button "Dump" is pressed.
 !
   IMPLICIT NONE
   CHARACTER(len=*) ::  filename
   INTEGER          :: l
   WRITE(*,'(a,a,a1)') 'Dumpfile = "', filename(1:l),'"'
 END SUBROUTINE dump
 
 SUBROUTINE quit()
 !
 !   Is invoked when button "Quit" is pressed
 !
   IMPLICIT NONE
   PRINT*, 'Program terminated ...'
 END SUBROUTINE quit
diff --git a/examples/basfun_perf.f90 b/examples/basfun_perf.f90
index 7cab71f..69c54d3 100644
--- a/examples/basfun_perf.f90
+++ b/examples/basfun_perf.f90
@@ -1,170 +1,170 @@
 !>
 !> @file basfun_perf.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Performance of scalar and vector versions of def_basfun
 !
   USE bsplines
   IMPLICIT NONE
   INTEGER :: nx, nidbas, nrank, npt=10, jdermx
   DOUBLE PRECISION :: dx
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid
   DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fun(:, :)
   INTEGER :: left, i, i1, i2
   INTEGER :: ngroup, nset, nremain
   TYPE(spline1d) :: splx
   DOUBLE PRECISION :: t0, t1, seconds
   DOUBLE PRECISION :: t_loop, t_locintv1, t_basfun1, t_locintv, t_basfun
   INTEGER :: its, nits
   INTEGER, ALLOCATABLE :: vleft(:)
   DOUBLE PRECISION, ALLOCATABLE :: vfun(:,:,:)
   LOGICAL :: nlperiod
 !
   NAMELIST /newrun/ nx, nidbas, npt, nits, ngroup, jdermx, nlperiod
 !
 !===============================================================================
 !
 !   1D grid
 !
   nx = 10
   nidbas = 3
   npt = 1000000
   nits = 100
   ngroup = 10
   jdermx = 0
   nlperiod = .FALSE.
   READ(*,newrun)
   WRITE(*,newrun)
 
   ALLOCATE(xgrid(0:nx))
   dx = 1.0d0/REAL(nx)
   xgrid = (/ (i*dx,i=0,nx) /)
   WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
 !
 !   Set up spline
 !
   CALL set_spline(nidbas, 4, xgrid, splx, period=nlperiod)
   nrank = splx%dim
   WRITE(*,'(a, i5)') 'nrank =', nrank
   WRITE(*,'(a/(10f8.3))') 'knots', splx%knots
 !
   ALLOCATE(xpt(npt))
   ALLOCATE(fun(0:nidbas,0:jdermx))  ! Values and first derivatives of all Splines
   CALL RANDOM_NUMBER(xpt)
 !===============================================================================
 !                            1.0 Scalar version
 !
 !    loop
   t0 = seconds()
   DO its=1,nits
      DO i=1,npt
      END DO
   END DO
   t_loop = (seconds()-t0)/REAL(nits*npt,8)
 !
 !   locintv
   t0 = seconds()
   DO its=1,nits
      DO i=1,npt
         CALL locintv(splx, xpt(i), left)
      END DO
   END DO
   t_locintv1 = (seconds()-t0)/REAL(nits*npt,8)
 !
 !   def_basfun
   t0 = seconds()
   DO its=1,nits
      DO i=1,npt
         CALL locintv(splx, xpt(i), left)
         CALL basfun(xpt(i), splx, fun, left+1)
      END DO
   END DO
   t_basfun1 = (seconds()-t0)/REAL(nits*npt,8)
 !
   WRITE(*,'(6x,3a12)') 'loop', 'locintv', 'basfun'
   WRITE(*,'(6x,8(1pe12.3))') t_loop, t_locintv1, t_basfun1
 !===============================================================================
 !                            2.0 Vector version
 !
   ngroup = 1
   DO WHILE (ngroup .LT. npt/2)
      ALLOCATE(vleft(ngroup))
      ALLOCATE(vfun(0:nidbas, 0:jdermx, ngroup))
      nset = npt/ngroup
      nremain = MODULO(npt, ngroup)
      IF(nremain.NE.0) nset = nset+1
 !
 !    loop
      t0 = seconds()
      DO its=1,nits
         i2 = 0
         DO i=1,nset
            i1 = i2+1
            i2 = MIN(i2+ngroup,npt)
         END DO
      END DO
      t_loop = (seconds()-t0)/REAL(nits*nset,8)
 !
 !   locintv
      t0 = seconds()
      DO its=1,nits
         i2 = 0
         DO i=1,nset
            i1 = i2+1
            i2 = MIN(i2+ngroup,npt)
            CALL locintv(splx, xpt(i1:i2), vleft)
         END DO
      END DO
      t_locintv = (seconds()-t0)/REAL(nits*npt,8)
 !
 !   basfun
      t0 = seconds()
      DO its=1,nits
         i2 = 0
         DO i=1,nset
            i1 = i2+1
            i2 = MIN(i2+ngroup,npt)
            CALL locintv(splx, xpt(i1:i2), vleft)
            CALL basfun(xpt(i1:i2), splx, vfun, vleft+1)
         END DO
      END DO
      t_basfun = (seconds()-t0)/REAL(nits*npt,8)
 !
      WRITE(*,'(i6,8(1pe12.3))') ngroup, t_loop, t_locintv, t_basfun, &
           &      t_locintv1/t_locintv, t_basfun1/t_basfun
      DEALLOCATE(vleft)
      DEALLOCATE(vfun)
      ngroup = ngroup*2
   END DO
 !===============================================================================
 !
 !   Clean up
 !
   CALL destroy_sp(splx)
   DEALLOCATE(xgrid)
   DEALLOCATE(xpt)
   DEALLOCATE(fun)
 END PROGRAM main
diff --git a/examples/dirichlet/Makefile b/examples/dirichlet/Makefile
index 4f0edc9..0b6276f 100644
--- a/examples/dirichlet/Makefile
+++ b/examples/dirichlet/Makefile
@@ -1,62 +1,62 @@
 #
 # @file Makefile
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Emmanuel Lanti <emmanuel.lanti@epfl.ch>
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 #BSPLINES = $(HOME)/bsplines
 #FUTILS   = $(HOME)/futils
 
 F90 = mpif90
 LD = $(F90)
 
 debug = -g -traceback -check bounds -warn alignments -warn nounused
 optim = -O3 -xHOST
 
 F90FLAGS = $(OPT) -I$(BSPLINES)/include -I$(FUTILS)/include
 LDFLAGS = $(OPT) -L$(BSPLINES)/lib -L$(FUTILS)/lib -L$(HDF5)/lib
 LIBS = -lbsplines -lpppack -lfutils -lhdf5_fortran -lhdf5 -lz
 
 LDFLAGS += -g -L$(MKL)
 LIBS += -lmkl_intel_lp64  -lmkl_sequential -lmkl_core -lpthread
 
 OPT=$(debug)
 #OPT=$(optim)
 
 .SUFFIXES:
 .SUFFIXES: .o .c .f90
 .f90.o:
 	$(F90) $(F90FLAGS) -c $<
 
 all:	poisson
 
 poisson: poisson.o poisson_mod.o
 	$(LD) $(LDFLAGS) -o $@ $^ $(LIBS)
 
 poisson.o: poisson_mod.o
 
 clean:
 	rm -f *.o *.mod
 
 distclean: clean
 	rm -f poisson a.out *~ *.h5 *.fig *.eps *.pdf
diff --git a/examples/dirichlet/poisson.f90 b/examples/dirichlet/poisson.f90
index fa96f4a..f6048e7 100644
--- a/examples/dirichlet/poisson.f90
+++ b/examples/dirichlet/poisson.f90
@@ -1,383 +1,383 @@
 !>
 !> @file poisson.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Solving the following 2d Poisson in cylibdrical coordinates, using splines:
 !
 !    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 0, with f(x=1,y) = cos(my)
 !    exact solution: f(x,y) = r^m cos(my)
 !
   USE bsplines
   USE matrix
   USE conmat_mod
   USE poisson_mod
   USE futils
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, dirmeth, nterms
   LOGICAL :: nlppform
   INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it
   DOUBLE PRECISION :: pi, coefx(5), coefy(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
   TYPE(spline2d) :: splxy
   TYPE(pbmat) :: mat
 !
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
   DOUBLE PRECISION :: seconds, mem, dopla
   DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2, shifty
   DOUBLE PRECISION :: err00, err10, err01
   INTEGER :: nits=500
 !
   CHARACTER(len=128) :: file='poisson.h5'
   INTEGER :: fid
 !
 !  Dirichlet BC properties encapsulated in a derived datatype
 !
   TYPE(dirich) :: right_bc
 !
   NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, dirmeth, &
        &            coefx, coefy
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intervals in x
   ny = 8              ! Number of intervals in y
   nidbas = (/3,3/)    ! Degree of splines
   ngauss = (/4,4/)    ! Number of Gauss points/interval
   mbess = 2           ! Exponent of differential problem
   nterms = 2          ! Number of terms in weak form
   nlppform = .TRUE.   ! Use PPFORM for gridval or not
   dirmeth = 1         ! 1: use spline interpolation in Dirichlet BC
                       ! 2: residual minimization in Dirichlet BC
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x (=radial) & y (=poloidal) axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
 !
   WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   CALL set_spline(nidbas, ngauss, &
        &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
 !
 !   FE matrix assembly
 !
   nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
   kl = (nidbas(1)+1)*ny -1     ! Number of sub-diagnonals
   ku = kl                      ! Number of super-diagnonals
   WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
 !
   CALL init(ku, nrank, nterms, mat)
   t0 = seconds()
   CALL conmat(splxy, mat, coefeq)
   tmat = seconds() - t0
   ALLOCATE(arr(nrank))
 !
 !   RHS assembly
 !
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(mbess, splxy, rhs)
 !
 !   Store some usefull parameters in right_bc
 !
     right_bc%meth = dirmeth
     right_bc%mbess = mbess
     right_bc%n1 = nx
     right_bc%n2 = ny
     right_bc%nidbas1 = nidbas(1)
     right_bc%nidbas2 = nidbas(2)
 !
 !   BC on Matrix and RHS
 !
   CALL ibcmat(mat, right_bc)
   CALL ibcrhs(rhs, ygrid, right_bc)
 
   WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
   t0 = seconds()
   CALL factor(mat)
   tfact = seconds() - t0
   gflops1 = dopla('DPBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
 
   t0 = seconds()
   CALL bsolve(mat, rhs, sol)
 !
 !   Backtransform of solution
 !
   sol(1:ny-1) = sol(ny)
 !
 !   Spline coefficients, taking into account of periodicity in y
 !   Note: in SOL, y was numbered first.
 !
   dimx = splxy%sp1%dim
   dimy = splxy%sp2%dim
   ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,ny) + i*ny + 1
         bcoef(i,j) = sol(ij)
      END DO
   END DO
 !
   tsolv = seconds() - t0
   gflops2 = dopla('DPBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
 !===========================================================================
 !              4.0 Check the solution
 !
 !
 !   Check function values computed with various method
 !
   ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = f_exact(mbess, xgrid(i), ygrid(j))
      END DO
   END DO
   jder = (/0,0/)
 !
 !   Compute PPFORM at first call to gridval
   IF(nlppform) THEN
      CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
   END IF
 !
   WRITE(*,'(/a)') '*** Checking solutions'
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   END DO
   tgrid = (seconds() - t0)/REAL(nits)
 !
   errsol = solana - solcal
   WRITE(*,'(a/(8(1pe12.3)))') 'Error at the boundary r = 1', errsol(nx,:)
   err00 = err2_norm(splxy, jder, mbess, f_exact)
 !
   CALL putarr(fid, '/xgrid', xgrid, 'r')
   CALL putarr(fid, '/ygrid', ygrid, '\theta')
   CALL putarr(fid, '/sol', solcal, 'Solutions')
   CALL putarr(fid, '/solana', solana,'Exact solutions')
   CALL putarr(fid, '/errors', errsol, 'Errors')
 !
 !   Check derivatives d/dx and d/dy
 !
   WRITE(*,'(/a)') '*** Checking gradient'
   DO i=0,nx
      DO j=0,ny
         IF( mbess .EQ. 0 ) THEN
            solana(i,j) = 0.0d0
         ELSE
            solana(i,j) = fx_exact(mbess,xgrid(i),ygrid(j))
         END IF
      END DO
   END DO
 !
   jder = (/1,0/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
 !
   errsol = solana - solcal
   err10 = err2_norm(splxy, jder, mbess, fx_exact)
 !
   DO i=0,nx
      DO j=0,ny
          IF( mbess .EQ. 0 ) THEN
            solana(i,j) = 0.0d0
         ELSE
            solana(i,j) = fy_exact(mbess, xgrid(i), ygrid(j))
         END IF
      END DO
   END DO
 !
   jder = (/0,1/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
 !
   errsol = solana - solcal
   err01 = err2_norm(splxy, jder, mbess, fy_exact)
 !
   WRITE(*,'(/a,3(1pe12.3))') 'Discretization errors', err00, err10, err01
 !===========================================================================
 !              9.0  Epilogue
 !
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
   WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
   WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
 !
   DEALLOCATE(xgrid, rhs, sol)
   DEALLOCATE(solcal, solana, errsol)
   DEALLOCATE(bcoef)
   DEALLOCATE(arr)
   CALL destroy_sp(splxy)
   CALL destroy(mat)
   CALL closef(fid)
 !
 !===========================================================================
 !
 CONTAINS
 !--
   DOUBLE PRECISION FUNCTION f_exact(m,x,y)
     INTEGER,INTENT(in) :: m
     DOUBLE PRECISION, INTENT(in) :: x, y
     f_exact = (x**m)*COS(m*y)
   END FUNCTION f_exact
 !--
   DOUBLE PRECISION FUNCTION fx_exact(m,x,y)
     INTEGER,INTENT(in) :: m
     DOUBLE PRECISION, INTENT(in) :: x, y
     fx_exact = m*(x**(m-1))*COS(m*y)
   END FUNCTION fx_exact
 !--
   DOUBLE PRECISION FUNCTION fy_exact(m,x,y)
     INTEGER,INTENT(in) :: m
     DOUBLE PRECISION, INTENT(in) :: x, y
     fy_exact = -m*(x**m)*SIN(m*y)
   END FUNCTION fy_exact
 !--
   SUBROUTINE prntmat(str, a)
     DOUBLE PRECISION, DIMENSION(:,:) :: a
     CHARACTER(len=*) :: str
     INTEGER :: i
     WRITE(*,'(a)') TRIM(str)
     DO i=1,SIZE(a,1)
        WRITE(*,'(10f8.1)') a(i,:)
     END DO
   END SUBROUTINE prntmat
 !--
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:,:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i, j
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        DO j=1,SIZE(x,2)
           sum2 = sum2 + x(i,j)**2
        END DO
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
 !--
   SUBROUTINE prnmat(label, mat)
     CHARACTER(len=*) :: label
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
     INTEGER :: i
     WRITE(*,'(/a)') TRIM(label)
     DO i=1,SIZE(mat,1)
        WRITE(*,'(10(1pe12.3))') mat(i,:)
     END DO
   END SUBROUTINE prnmat
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
 !--
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
 
diff --git a/examples/dirichlet/poisson.m b/examples/dirichlet/poisson.m
index 3a56ac8..bb5c2e3 100644
--- a/examples/dirichlet/poisson.m
+++ b/examples/dirichlet/poisson.m
@@ -1,88 +1,88 @@
 %
 % @file poisson.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='poisson.h5';
 m=3;
 %
 %     Get data from data sets
 %
 r=hdf5read(file,'/xgrid');
 t=hdf5read(file,'/ygrid');
 sol=hdf5read(file,'/sol')';
 solexact=hdf5read(file,'/solana')';
 err=hdf5read(file,'/errors')';
 solr=hdf5read(file,'/derivx')';
 solt=hdf5read(file,'/derivy')';
 
 
 [R,T]=meshgrid(r,t);
 x = R.*cos(T); y= R.*sin(T);
 solx = cos(T).*solr - sin(T)./R.*solt;
 soly = sin(T).*solr + cos(T)./R.*solt;
 
 figure
 subplot(221)
 pcolor(double(r),double(t),double(sol));
 shading interp
 hold on, quiver(r,t,solr,solt)
 xlabel('r'); ylabel('\theta')
 title('R-THETA plane')
 colorbar
 
 subplot(222)
 pcolor(double(x),double(y),double(sol))
 shading interp
 hold on, quiver(x,y,solx,soly)
 hold off, axis image
 xlabel('X'); ylabel('Y')
 title('X-Y plane')
 colorbar
 
 subplot(223)
 surfc(double(x),double(y),double(sol))
 xlabel('X'); ylabel('Y');
 title('Solutions')
 
 subplot(224)
 surfc(double(x),double(y),double(err))
 xlabel('X'); ylabel('Y');
 title('Errors')
 
 figure
 subplot(211)
 plot(r,sol(1,:),'o',r,solexact(1,:))
 xlabel('r')
 ylabel('Solutions at \theta=0')
 grid on
 subplot(212)
 tt=0:0.01:2*pi;
 plot(t,sol(:,end),'o',tt,cos(m.*tt))
 xlabel('\theta')
 ylabel('Solutions at r=1')
 grid on
 
 
 
 
diff --git a/examples/dirichlet/poisson_mod.f90 b/examples/dirichlet/poisson_mod.f90
index ae234c5..96e9417 100644
--- a/examples/dirichlet/poisson_mod.f90
+++ b/examples/dirichlet/poisson_mod.f90
@@ -1,354 +1,354 @@
 !>
 !> @file poisson_mod.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE poisson_mod
   IMPLICIT NONE
 !
 !  Dirichlet BC encapsulated in a derived datatype
 !
   TYPE dirich
      INTEGER :: meth, mbess, n1, n2, nidbas1, nidbas2
      INTEGER :: i0, i1
      DOUBLE PRECISION, POINTER :: amat(:,:) => NULL()
      DOUBLE PRECISION, POINTER :: g(:) => NULL()
   END TYPE dirich
 !
 CONTAINS
   SUBROUTINE disrhs(mbess, spl, rhs)
 !
 !   Assembly the RHS using 2d spline spl
 !
     USE bsplines
     INTEGER, INTENT(in) :: mbess
     TYPE(spline2d), INTENT(in) :: spl
     DOUBLE PRECISION, INTENT(out) :: rhs(:)
 !
 !   The RHS is 0 
 !
     rhs(:) = 0.0d0
   END SUBROUTINE disrhs
 !
   SUBROUTINE ibcmat(mat, bc)
 !
 !   Apply BC on matrix
 !
     USE matrix
     TYPE(pbmat), INTENT(inout) :: mat
     TYPE(dirich) :: bc
     INTEGER :: ny
     INTEGER :: kl, ku, nrank, i, j, k
     DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
     INTEGER :: i0, i1, ii
 !===========================================================================
 !              1.0 Prologue
 !
     ku = mat%ku
     kl = ku
     nrank = mat%rank
     ny = bc%n2
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum on the NY-th row
 !
     ALLOCATE(zsum(nrank))
     ALLOCATE(arr(nrank))
     zsum = 0.0d0
     DO i=1,ny
        arr = 0.0d0
        CALL getrow(mat, i, arr)
        DO j=1,ny+ku
           zsum(j) = zsum(j) + arr(j)
        END DO
     END DO
 !
     zsum(ny) = SUM(zsum(1:ny))   ! using symmetry
     CALL putrow(mat, ny, zsum)
     DEALLOCATE(zsum)
 !
 !   The away operator
 !
     DO i = 1,ny-1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
     DEALLOCATE(arr)
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
 !!$    i0 = nrank - ku
 !!$    i1 = nrank - ny
     i0 = (bc%n1-1)*bc%n2 + 1
     i1 = nrank - bc%n2
     bc%i0 = i0
     bc%i1 = i1
 !
     IF(ASSOCIATED(bc%amat)) DEALLOCATE(bc%amat)
     IF(ASSOCIATED(bc%g)) DEALLOCATE(bc%g)
     ALLOCATE(bc%amat(i0:i1,ny))
     ALLOCATE(bc%g(ny))
 !
     WRITE(*,'(/a,2i6)') 'IBCMAT: i0, i1 =', i0, i1
 !
 !   Extract and save the last ny columns of matrix
 !
     ALLOCATE(arr(nrank))
     DO k=1,ny
        j = nrank-ny+k
        CALL getcol(mat, j, arr)
        bc%amat(i0:i1,k) = arr(i0:i1)
        IF( ANY(arr(1:i0-1) .NE. 0.0d0) ) THEN
           WRITE(*,'(a,i4)') 'i0 is underestimated for j =', j
        END IF
     END DO
 !
 !   The away operator
 !
     DO k=1,ny
        j = nrank-ny+k
        arr = 0.0d0; arr(j) = 1.0d0
        CALL putrow(mat, j, arr)     
     END DO
 !
     DEALLOCATE(arr)
 !
   END SUBROUTINE ibcmat
 !+++
   SUBROUTINE ibcrhs(rhs, ygrid, bc)
 !
 !   Apply BC on RHS
 !
     DOUBLE PRECISION, INTENT(inout) :: rhs(:)
     DOUBLE PRECISION, INTENT(in)    :: ygrid(:)
     TYPE(dirich)                    :: bc
 !
     INTEGER :: nrank, ny, m, i0, i1
     DOUBLE PRECISION :: zsum
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = SIZE(rhs,1)
     ny = bc%n2
     m = bc%mbess
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum
 !
     zsum = SUM(rhs(1:ny))
     rhs(ny) = zsum
     rhs(1:ny-1) = 0.0d0
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
 !   Get spline coefs at boundary r=1
 !
     SELECT CASE (bc%meth)
     CASE(1)
        CALL dirich_interp(ygrid, bc, frhs)
     CASE(2)
        CALL dirich_minres(ygrid, bc, frhs)
     END SELECT
 !
 !   Modify RHS
 !
     i0 = bc%i0
     i1 = bc%i1
     rhs(i0:i1) = rhs(i0:i1) - MATMUL(bc%amat, bc%g)
     rhs(i1+1:nrank) = bc%g(1:ny)
   CONTAINS
     DOUBLE PRECISION FUNCTION frhs(x)
       DOUBLE PRECISION, INTENT(in) :: x
       frhs = COS(m*x)
     END FUNCTION frhs
   END SUBROUTINE ibcrhs
 !++++
   SUBROUTINE coefeq(x, y, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x, y
     INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
 ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
 !
     c(1) = x        ! 
     idt(1,1) = 1
     idt(1,2) = 0
     idw(1,1) = 1
     idw(1,2) = 0
 !
     c(2) = 1.d0/x
     idt(2,1) = 0
     idt(2,2) = 1
     idw(2,1) = 0
     idw(2,2) = 1
   END SUBROUTINE coefeq
 !++++
   SUBROUTINE dirich_interp(ygrid, bc, frhs)
 !
 !   Dirichlet BC by interpolation
 !
     USE bsplines
     DOUBLE PRECISION, INTENT(in) :: ygrid(:)
     TYPE(dirich)                 :: bc
     INTERFACE
        DOUBLE PRECISION FUNCTION frhs(x)
          DOUBLE PRECISION, INTENT(in) :: x
        END FUNCTION frhs
     END INTERFACE
 !
     INTEGER :: nidbas, dim, n2, i
     DOUBLE PRECISION :: shifty
     DOUBLE PRECISION :: gval(SIZE(ygrid))
     DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
     DOUBLE PRECISION :: ygrid_interp(SIZE(ygrid))
     TYPE(spline1d) :: spl_interp
 !    
     nidbas = bc%nidbas2
     n2 = bc%n2
 !
     IF(MODULO(nidbas,2) .EQ. 0 ) THEN
        shifty = 0.5d0*(ygrid(2)-ygrid(1))
        ygrid_interp(:) = ygrid(:) + shifty
     ELSE
        ygrid_interp(:) = ygrid(:)
     END IF
     CALL set_splcoef(nidbas, ygrid_interp, spl_interp, period=.TRUE.)
     CALL get_dim(spl_interp, dim)
     ALLOCATE(coefs(dim))
 !
     DO i=1,SIZE(ygrid)
        gval(i) = frhs(ygrid_interp(i))
     END DO
     CALL get_splcoef(spl_interp, gval, coefs)
 !
 !   Store spline coefs in bc
 !
     bc%g(1:n2) = coefs(1:n2)
 !
     DEALLOCATE(coefs)
     CALL destroy_sp(spl_interp)
   END SUBROUTINE dirich_interp
 !++++
   SUBROUTINE dirich_minres(xgrid, bc, frhs)
 !
 !   Dirichlet BC by minimization of residual
 !
     USE bsplines
     USE matrix
     USE conmat_mod
     DOUBLE PRECISION, INTENT(in) :: xgrid(:)
     TYPE(dirich)                 :: bc
     INTERFACE
        DOUBLE PRECISION FUNCTION frhs(x)
          DOUBLE PRECISION, INTENT(in) :: x
        END FUNCTION frhs
     END INTERFACE
 !
     INTEGER :: nx, nidbas, ngauss, kl, ku
     TYPE(periodic_mat) :: mass_mat
     TYPE(spline1d) :: spl
 !
     nidbas = bc%nidbas2
     ngauss = nidbas+1
     nx = bc%n2
     kl = nidbas
     ku = kl
 !    
     CALL set_spline(nidbas, ngauss, xgrid, spl, period=.TRUE.)   
     CALL init(kl, ku, nx, 1, mass_mat)
     CALL conmat(spl, mass_mat, coefeq_mass)
     CALL conrhs(spl, bc%g, frhs)
     CALL factor(mass_mat)
     CALL bsolve(mass_mat, bc%g)
 !
     CALL destroy(mass_mat)
     CALL destroy_sp(spl)
 !
   CONTAINS
     SUBROUTINE coefeq_mass(x, idt, idw, c)
       DOUBLE PRECISION, INTENT(in) :: x
       INTEGER, INTENT(out) :: idt(:), idw(:)
       DOUBLE PRECISION, INTENT(out) :: c(:)
       c(1) = 1.0d0
       idt(1) = 0
       idw(1) = 0
     END SUBROUTINE coefeq_mass
   END SUBROUTINE dirich_minres
 !++++
   DOUBLE PRECISION FUNCTION err2_norm(spl, jder, mbess, fexact)
 !
 !  Compute error L2 norm unsing Gauss points
 !
     USE bsplines
     TYPE(spline2d) :: spl
     INTEGER, INTENT(in) :: jder(:)
     INTEGER, INTENT(in) :: mbess
     INTERFACE
        DOUBLE PRECISION FUNCTION fexact(m,x,y)
          INTEGER, INTENT(in)          :: m
          DOUBLE PRECISION, INTENT(in) :: x, y
        END FUNCTION fexact
     END INTERFACE
 !
     DOUBLE PRECISION, POINTER :: xg1(:,:), xg2(:,:), wg1(:,:), wg2(:,:)
     INTEGER :: i1, ig1, n1, nidbas1, ndim1, ng1
     INTEGER :: i2, ig2, n2, nidbas2, ndim2, ng2
     DOUBLE PRECISION :: contrib
     DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:), sol(:,:)
 !
 !   Gauss points and weights on all intervals
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
     xg1 => spl%sp1%gausx  ! xg1(ng1,n1)
     wg1 => spl%sp1%gausw  ! wg1(ng1,n1)
     ng1 = SIZE(xg1,1)
     xg2 => spl%sp2%gausx
     wg2 => spl%sp2%gausw
     ng2 = SIZE(xg2,1)
 !
     err2_norm = 0.0d0
     ALLOCATE(x(ng1), y(ng2))
     ALLOCATE(sol(ng1,ng2))
     DO i1=1,n1                
        x=xg1(:,i1)
        DO i2=1,n2
           y=xg2(:,i2)
           CALL gridval(spl, x, y, sol, jder)
           DO ig1=1,ng1            
              DO ig2=1,ng2      
                 contrib = wg1(ig1,i1)*wg2(ig2,i2)*(sol(ig1,ig2) - &
                      &    fexact(mbess,x(ig1),y(ig2)))**2
                 err2_norm = err2_norm +  x(ig1)*contrib !use same inner-product in weak-form
              END DO
           END DO
        END DO
     END DO
     DEALLOCATE(x)
     DEALLOCATE(y)
     DEALLOCATE(sol)
     err2_norm = SQRT(err2_norm)
   END FUNCTION err2_norm
 END MODULE poisson_mod
diff --git a/examples/dirichlet/run_poisson.sh b/examples/dirichlet/run_poisson.sh
index dafc434..7833785 100644
--- a/examples/dirichlet/run_poisson.sh
+++ b/examples/dirichlet/run_poisson.sh
@@ -1,46 +1,46 @@
 #
 # @file run_poisson.sh
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 #!/bin/sh
 EXEC=./poisson
 
 cat > in0 <<EOF
 &newrun
  nx = xnints, ny = xnints,
  nidbas = 4,4
  ngauss = 5,5
  mbess = 3,
  nlppform = t,
  dirmeth = 2, 
  coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
  coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
 /
 EOF
 
 for x in 8 16 32 64 128 256; do
     sed "s/xnints/$x/g" in0 > in1
     $EXEC < in1 | grep 'Discretization errors '
 done
 rm -f in?
diff --git a/examples/dismat.f90 b/examples/dismat.f90
index a15e8df..c4bf0ed 100644
--- a/examples/dismat.f90
+++ b/examples/dismat.f90
@@ -1,157 +1,157 @@
 !>
 !> @file dismat.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 SUBROUTINE dismat(spl, mat)
 !
 !   Assembly of FE matrix mat using spline spl
 !
   USE bsplines
   USE matrix
   IMPLICIT NONE
   TYPE(spline2d), INTENT(in) :: spl
   TYPE(gbmat), INTENT(inout) :: mat
 !
   INTEGER :: n1, nidbas1, ndim1, ng1
   INTEGER :: n2, nidbas2, ndim2, ng2
   INTEGER :: i, j, ig1, ig2
   INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
   DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
   DOUBLE PRECISION:: contrib
 !
   INTEGER :: kterms         ! Number of terms in weak form
   INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
   DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
   INTEGER, ALLOCATABLE :: left1(:), left2(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
   CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
   WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
   WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl%sp1, ng1)
   CALL get_gauss(spl%sp2, ng2)
   ALLOCATE(xg1(ng1), wg1(ng1))
   ALLOCATE(xg2(ng1), wg2(ng1))
 !
 !   Weak form
 !
   kterms = mat%nterms
   ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
   ALLOCATE(coefs(kterms,ng1,ng2))
 !
   ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
   ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! 
 !===========================================================================
 !              2.0 Assembly loop
 !
   ALLOCATE(left1(ng1))
   ALLOCATE(left2(ng2))
   DO i=1,n1
      CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
      left1 = i
      CALL basfun(xg1, spl%sp1, fun1, left1)
      DO j=1,n2
         CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
         left2 = j
         CALL basfun(xg2, spl%sp2, fun2, left2)
 !
         DO ig1=1,ng1
            DO ig2=1,ng2
               CALL coefeq(xg1(ig1), xg2(ig2), &
                    &      idert(:,:,ig1,ig2), &
                    &      iderw(:,:,ig1,ig2), &
                    &      coefs(:,ig1,ig2))
            END DO
         END DO
 !
         DO iw1=0,nidbas1  ! Weight function in dir 1
            igw1 = i+iw1
            DO iw2=0,nidbas2  ! Weight function in dir 2
               igw2 = MODULO(j+iw2-1, n2) + 1
               irow = igw2 + (igw1-1)*n2
               DO it1=0,nidbas1  ! Test function in dir 1
                  igt1 = i+it1
                  DO it2=0,nidbas2  ! Test function in dir 2
                     igt2 = MODULO(j+it2-1, n2) + 1
                     jcol = igt2 + (igt1-1)*n2
 !-------------
                     contrib = 0.0d0
                     DO ig1=1,ng1
                        DO ig2=1,ng2
                           DO iterm=1,kterms
                              contrib = contrib + &
                                   &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
                                   &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
                                   &    coefs(iterm,ig1,ig2) *                 &
                                   &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
                                   &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
                                   &    wg1(ig1) * wg2(ig2)
                           END DO
                        END DO
                     END DO
                     CALL updtmat(mat, irow, jcol, contrib)
 !-------------
                  END DO
               END DO
            END DO
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(xg1, wg1, fun1)
   DEALLOCATE(xg2, wg2, fun2)
   DEALLOCATE(idert, iderw, coefs)
   DEALLOCATE(left1,left2)
 !
 CONTAINS
   SUBROUTINE coefeq(x, y, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x, y
     INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
     DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
 !
 ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
 !
     c(1) = x        ! 
     idt(1,1) = 1
     idt(1,2) = 0
     idw(1,1) = 1
     idw(1,2) = 0
 !
     c(2) = 1.d0/x
     idt(2,1) = 0
     idt(2,2) = 1
     idw(2,1) = 0
     idw(2,2) = 1
   END SUBROUTINE coefeq
 END SUBROUTINE dismat
diff --git a/examples/disrhs.f90 b/examples/disrhs.f90
index 24cbb84..cf29fd5 100644
--- a/examples/disrhs.f90
+++ b/examples/disrhs.f90
@@ -1,198 +1,198 @@
 !>
 !> @file disrhs.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 SUBROUTINE disrhs(mbess, spl, rhs)
 !
 !   Assembly the RHS using 2d spline spl
 !
   USE bsplines
   IMPLICIT NONE
   INTEGER, INTENT(in) :: mbess
   TYPE(spline2d), INTENT(in) :: spl
   DOUBLE PRECISION, INTENT(out) :: rhs(:)
   INTEGER :: n1, nidbas1, ndim1, ng1
   INTEGER :: n2, nidbas2, ndim2, ng2
   INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
   DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
   DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
   CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
 !
   ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
   ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl%sp1, ng1)
   CALL get_gauss(spl%sp2, ng2)
   WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2
   ALLOCATE(xg1(ng1), wg1(ng1))
   ALLOCATE(xg2(ng1), wg2(ng1))
 !===========================================================================
 !              2.0 Assembly loop
 !
   nrank = SIZE(rhs)
   rhs(1:nrank) = 0.0d0
 !
   DO i=1,n1
      CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
      DO ig1=1,ng1
         CALL basfun(xg1(ig1), spl%sp1, fun1, i)
         DO j=1,n2
            CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
            DO ig2=1,ng2
               CALL basfun(xg2(ig2), spl%sp2, fun2, j)
               contrib = wg1(ig1)*wg2(ig2) * &
                    &    rhseq(xg1(ig1),xg2(ig2), mbess)
               DO k1=0,nidbas1
                  i1 = i+k1
                  DO k2=0,nidbas2
                     j2 = MODULO(j+k2-1,n2) + 1
                     ij = j2 + (i1-1)*n2
                     rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
                  END DO
               END DO
            END DO
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(xg1, wg1, fun1)
   DEALLOCATE(xg2, wg2, fun2)
 !
 CONTAINS
   DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
     DOUBLE PRECISION, INTENT(in) :: x1, x2
     INTEGER, INTENT(in) :: m
     rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
   END FUNCTION rhseq
 END SUBROUTINE disrhs
 !
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 SUBROUTINE disrhs3(mbess, npow, spl, rhs)
 !
 !   Assembly the RHS using 3d spline spl
 !
   USE bsplines
   IMPLICIT NONE
   INTEGER, INTENT(in)           :: mbess, npow
   TYPE(spline2d1d), TARGET      :: spl
   DOUBLE PRECISION, INTENT(out) :: rhs(:,:)
 !
   TYPE(spline1d), POINTER :: sp1, sp2, sp3
   INTEGER :: n1, nidbas1, ndim1, ng1
   INTEGER :: n2, nidbas2, ndim2, ng2
   INTEGER :: n3, nidbas3, ndim3, ng3
   INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank
   DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:)
   DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   sp1 => spl%sp12%sp1
   sp2 => spl%sp12%sp2
   sp3 => spl%sp3
 !
   CALL get_dim(sp1, ndim1, n1, nidbas1)
   CALL get_dim(sp2, ndim2, n2, nidbas2) 
   CALL get_dim(sp3, ndim3, n3, nidbas3) 
 !
   ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
   ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
   ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
   CALL get_gauss(sp1, ng1)
   CALL get_gauss(sp2, ng2)
   CALL get_gauss(sp3, ng3)
   WRITE(*,'(/a, 3i3)') 'Gauss points and weights, ngauss =', ng1, ng2, ng3
   ALLOCATE(xg1(ng1), wg1(ng1))
   ALLOCATE(xg2(ng2), wg2(ng2))
   ALLOCATE(xg3(ng3), wg3(ng3))
 !===========================================================================
 !              2.0 Assembly loop
 !
   nrank = SIZE(rhs,1)
   rhs(1:nrank,1:n3) = 0.0d0
 !
   DO i=1,n1
      CALL get_gauss(sp1, ng1, i, xg1, wg1)
      DO ig1=1,ng1
         CALL basfun(xg1(ig1), sp1, fun1, i)
         DO j=1,n2
            CALL get_gauss(sp2, ng2, j, xg2, wg2)
            DO ig2=1,ng2
               CALL basfun(xg2(ig2), sp2, fun2, j)
               DO k=1,n3
                  CALL get_gauss(sp3, ng3, k, xg3, wg3)
                  DO ig3=1,ng3
                     CALL basfun(xg3(ig3), sp3, fun3, k)
                     contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * &
                          &    rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow)
                     DO k1=0,nidbas1
                        i1 = i+k1
                        DO k2=0,nidbas2
                           j2 = MODULO(j+k2-1,n2) + 1
                           ij = j2 + (i1-1)*n2
                           DO k3=0,nidbas3
                              kk = MODULO(k+k3-1,n3) + 1
                              rhs(ij,kk) = rhs(ij, kk) + &
                                   &  contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1)
                           END DO
                        END DO
                     END DO
                  END DO
               END DO
            END DO
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(xg1, wg1, fun1)
   DEALLOCATE(xg2, wg2, fun2)
   DEALLOCATE(xg3, wg3, fun3)
 !
 CONTAINS
   DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n)
     DOUBLE PRECISION, INTENT(in) :: x1, x2, x3
     INTEGER, INTENT(in) :: m, n
     rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n
   END FUNCTION rhseq
 END SUBROUTINE disrhs3
diff --git a/examples/driv1.f90 b/examples/driv1.f90
index 03a9329..e725b38 100644
--- a/examples/driv1.f90
+++ b/examples/driv1.f90
@@ -1,189 +1,189 @@
 !>
 !> @file driv1.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 ! Basis splines on a 2d grid.
 !
   USE bsplines
   USE futils
   IMPLICIT NONE
   TYPE(spline1d) :: spx, spy
   INTEGER :: nx=10, ny=8, nidbas=2, ngauss=4, npts=1000
   DOUBLE PRECISION :: a, b, coefx(5), coefy(5)
   DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), ygrid(:), fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xp(:), funxp(:,:), yp(:), funyp(:,:)
   DOUBLE PRECISION :: dx, dy
   INTEGER :: i, j, left
   CHARACTER(len=256) :: title
   INTEGER :: fid
   NAMELIST /newrun/ nx, ny, nidbas, ngauss, a, b, coefx, coefy
 !===========================================================================
   nidbas = 3
   ngauss = 4
   nx = 10    ! Number of intevals in x
   ny = 8     ! Number of intevals in y
   a = 0.0d0  ! Left boundary of interval
   b = 1.0d0  ! Right boundary of interval
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   READ(*,newrun)
   WRITE(*,newrun)
 !===========================================================================
 !                  1.0 Set up grids
 !
   ALLOCATE( xgrid(0:nx) )
   xgrid(0) = a
   xgrid(nx) = b
   CALL meshdist(coefx, xgrid, nx)
 !
 !!$  dy = 2.d0*pi/REAL(ny)
   dy = 1.0d0
   ALLOCATE( ygrid(0:ny) )
   ygrid(0) = a
   ygrid(ny) = b
   CALL meshdist(coefy, ygrid, ny)
 !===========================================================================
 !                  2.0 Set up splines on (x,y)
 !
   CALL set_spline(nidbas, ngauss, xgrid, spx)
   CALL set_spline(nidbas, ngauss, ygrid, spy, period=.TRUE.)
   WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of x-splines', LBOUND(spx%knots), &
        &                       ':',UBOUND(spx%knots), spx%knots
   WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', spx%dim
   WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of y-splines', LBOUND(spy%knots), &
        &                       ':',UBOUND(spy%knots), spy%knots
   WRITE(*,'(2(a,i5, 2x))') 'NY =', ny, 'DIM =', spy%dim
 !===========================================================================
 !                  3.0 Graph the splines on (x,y)
 !
   ALLOCATE( fun(nidbas+1,1) )         ! Only 0-th derivative
 !!$  ALLOCATE( fun(nidbas+1,0:1) )         ! Only 0-th derivative
   ALLOCATE( xp(npts), funxp(npts,0:spx%dim-1) )
   ALLOCATE( yp(npts-1), funyp(npts-1,0:spy%dim-1) )
 !
 !   Splines in X (non-peridic)
 !
   WRITE(*,'(a)') 'Splines in x'
   dx = (xgrid(nx)-xgrid(0)) / REAL(NPTS-1)
   DO i=1,npts
      xp(i) = xgrid(0) + (i-1)*dx
      CALL locintv(spx, xp(i), left)
      CALL basfun(xp(i), spx, fun, left+1)
      funxp(i,left:left+nidbas) = fun(:,1)
   END DO
 !
 !   Splines in Y (periodic)
 !
   WRITE(*,'(a)') 'Splines in y'
   dy = (ygrid(ny)-ygrid(0)) / REAL(NPTS-1)
   DO i=1,npts-1
      yp(i) = ygrid(0) + (i-1)*dy
      CALL locintv(spy, yp(i), left)
      CALL basfun(yp(i), spy, fun, left+1)
      funyp(i,left:left+nidbas) = fun(:,1)
   END DO
 !
 !  Create hdf5 file
 !
   CALL creatf('driv1.h5', fid, real_prec='d')
 !
   WRITE(title,'(a,i3,5x,a,i6)') 'Splines of degree =', nidbas, 'NX =', nx
   CALL putarr(fid, 'X', xp)
   CALL putarr(fid, 'KNOTSX', spx%knots)
   CALL putarr(fid, 'splinesx', funxp, TRIM(title))
   CALL putarr(fid, 'KNOTSY', spy%knots)
 !
   WRITE(title,'(a,i3,5x,a,i6)') 'Periodic splines of degree =', nidbas, 'NY =', ny
   CALL putarr(fid, 'Y', yp)
   CALL putarr(fid, 'splinesy', funyp, TRIM(title))
   CALL closef(fid)
 !===========================================================================
 !                  9.0 Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgrid, ygrid)
   DEALLOCATE(xp, funxp)
   CALL destroy_sp(spx)
   CALL destroy_sp(spy)
 END PROGRAM main
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
diff --git a/examples/driv2.f90 b/examples/driv2.f90
index b67b578..720f624 100644
--- a/examples/driv2.f90
+++ b/examples/driv2.f90
@@ -1,180 +1,180 @@
 !>
 !> @file driv2.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 ! Integration of splines
 !
   USE bsplines
 !
   IMPLICIT NONE
   TYPE(spline1d) :: spx
   INTEGER :: nx, nidbas, ngauss
   DOUBLE PRECISION :: a, b, coefx(5)
   DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fun(:,:), finteg(:)
   DOUBLE PRECISION, ALLOCATABLE :: xg(:), wg(:)
   DOUBLE PRECISION :: support, res, err
   INTEGER :: i, ig, j, jj, left
   INTEGER :: dim, ng
   LOGICAL :: periodic
   NAMELIST /newrun/ periodic, nx, nidbas, ngauss, a, b, coefx
 !===========================================================================
 !                  1.0 Set up grids
 !
 !   Read in data specific to run
 !
   periodic = .FALSE.
   nidbas = 3
   ngauss = 4
   nx = 10    ! Number of intevals in x
   a = 0.0d0  ! Left boundary of interval
   b = 1.0d0  ! Right boundary of interval
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid/knots
 !
   ALLOCATE(xgrid(0:nx))
   xgrid(0) = a
   xgrid(nx) = b
   CALL meshdist(coefx, xgrid, nx)
   WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
   WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
 !===========================================================================
 !                  2.0 Set up splines
 !
   CALL set_spline(nidbas, ngauss, xgrid, spx, periodic)
   WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of x-splines', LBOUND(spx%knots), &
        &                       ':',UBOUND(spx%knots), spx%knots
   WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', spx%dim
 !===========================================================================
 !                  3.0 Integrate all splines
 !
   CALL get_dim(spx, dim) 
   ALLOCATE( finteg(0:dim-1), xg(ngauss), wg(ngauss), fun(0:nidbas,1) )
   finteg = 0.0
   fun = 0.0
   DO i=1,nx             ! Loop thru the intervals
      CALL get_gauss(spx, ng, i, xg, wg)
      DO ig=1,ng         ! Loop thru Gauss points
         CALL basfun(xg(ig), spx, fun, i)
         left = i-1
         DO j=0,nidbas   ! Loop thru the splines in this interval
            jj = left+j
            IF( periodic ) jj = MODULO(left+j, nx)
            finteg(jj) = finteg(jj) + wg(ig)*fun(j,1)
         END DO
      END DO
   END DO
 !!$  IF( periodic ) THEN
 !!$     DO i=nx,dim-1
 !!$        finteg(i) = finteg(i-nx)
 !!$     END DO
 !!$  END IF
 !
   WRITE(*,'(a/(10f10.5))') 'Integrals of splines', finteg
   PRINT*, 'Sum of finteg', SUM(finteg)
 !!$  IF( periodic ) THEN
 !!$     PRINT*, 'Sum of finteg', SUM(finteg(0:nx-1))
 !!$  ELSE
 !!$     PRINT*, 'Sum of finteg', SUM(finteg)
 !!$  END IF
 !
   WRITE(*,'(a/(10f10.5))') 'Integrals of splines from module', spx%intspl
   PRINT*, 'Sum of finteg', SUM(spx%intspl)
   WRITE(*,'(a5,4a12)') '#', 'I', 'S', '(p+1)I/S', '(p+1)I/S-1'
   DO i=0,spx%dim-1
      support = spx%knots(i+1)-spx%knots(i-nidbas)
      res =  spx%intspl(i)/support*(nidbas+1)
      err = res - 1.0d0
      WRITE(*,'(i5,4(1pe12.4))') i, spx%intspl(i), support, res, err
   END DO
 !===========================================================================
 !                  9.0 Epilogue
 !
   DEALLOCATE( finteg, xg, wg, fun )
   DEALLOCATE(xgrid)
   CALL destroy_sp(spx)
 END PROGRAM main
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
diff --git a/examples/driv3.f90 b/examples/driv3.f90
index 8df6e99..cce04fd 100644
--- a/examples/driv3.f90
+++ b/examples/driv3.f90
@@ -1,159 +1,159 @@
 !>
 !> @file driv3.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 ! Computation of croos mass matrix between two splines sp1 & sp2 
 ! sp1 and sp2 can be splines of any type (i.e. either set up with set_spline or
 ! set_splcoef) and of any order.
 
 !
   USE bsplines
 !
   IMPLICIT NONE
   TYPE(spline1d) :: sp1, sp2
   INTEGER :: nx, nidbas1, nidbas2, ngauss
   INTEGER :: i, j
   DOUBLE PRECISION :: a, b, coefx(5)
   DOUBLE PRECISION, ALLOCATABLE :: xgrid(:)
   DOUBLE PRECISION, DIMENSION(:, :), POINTER :: MassMat
   LOGICAL :: periodic1, periodic2
   NAMELIST /newrun/ nx, a, b, coefx, nidbas1, nidbas2, periodic1, periodic2
 !===========================================================================
 !                  1.0 Set up grids
 !
 !   Read in data specific to run
 !
   nx = 8    ! Number of intevals in x
   a = 0.0d0  ! Left boundary of interval
   b = 1.0d0  ! Right boundary of interval
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   periodic1 = .FALSE.
   periodic2 = .FALSE.
   nidbas1 = 3
   nidbas2 = 2
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid/knots
 !
   ALLOCATE(xgrid(0:nx))
   xgrid(0 ) = a
   xgrid(nx) = b
   CALL meshdist(coefx, xgrid, nx)
   WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
 !===========================================================================
 !                  2.0 Set up splines
 !
   ngauss = 1 ! Gauss points initialized with set_spline are in fact not used
              ! for computing cross mass matrix
   ! First spline set up as for solving a PDE with FEMs
   CALL set_spline(nidbas1, ngauss, xgrid, sp1, periodic1)
 
   ! Second spline set up as for interpolation
   CALL set_splcoef(nidbas2, xgrid, sp2, periodic2)
 
   WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp1', LBOUND(sp1%knots), &
        &                       ':',UBOUND(sp1%knots), sp1%knots
   WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp2', LBOUND(sp2%knots), &
        &                       ':',UBOUND(sp2%knots), sp2%knots
   WRITE(*,'(3(a,i5, 2x))') 'NX =', nx, 'DIM sp1 =', sp1%dim, 'DIM sp2 =', sp2%dim
 !===========================================================================
 !                  3.0 Compute cross mass matrix
 !
   CALL CompMassMatrix(sp1, sp2, a, b, MassMat)
 
   WRITE(*, "(a)") "Cross-mass matrix between splines sp1 & sp2:"
   DO i = 1, SIZE(MassMat, 1)
      WRITE(*, "(15f13.5)") (MassMat(i, j), j = 1, MIN(SIZE(MassMat, 2), 15))
   END DO
 
 !===========================================================================
 !                  9.0 Epilogue
 !
   DEALLOCATE(MassMat)
   DEALLOCATE(xgrid)
   CALL destroy_sp(sp1)
   CALL destroy_sp(sp2)
 
 END PROGRAM main
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
diff --git a/examples/driv4.f90 b/examples/driv4.f90
index 63711b7..3e7cad4 100644
--- a/examples/driv4.f90
+++ b/examples/driv4.f90
@@ -1,225 +1,225 @@
 !>
 !> @file driv4.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 ! Computation of croos mass matrix between two splines sp1 & sp2 
 ! sp1 and sp2 can be splines of any type (i.e. either set up with set_spline or
 ! set_splcoef) and of any order.
 
 !
   USE bsplines
   USE matrix
 !
   IMPLICIT NONE
   TYPE(gbmat) :: matm
   INTEGER :: mrows, ncols, kl, ku
   DOUBLE PRECISION, ALLOCATABLE :: avec(:,:), bvec(:,:), matfull(:,:)
 !
   TYPE(zgbmat) :: zmatm
   DOUBLE COMPLEX, ALLOCATABLE :: zavec(:,:), zbvec(:,:)
   DOUBLE PRECISION :: dznrm2
 !
   TYPE(spline1d) :: sp1, sp2
   INTEGER :: nx, nidbas1, nidbas2, ngauss
   INTEGER :: i, j
   DOUBLE PRECISION :: a, b, coefx(5)
   DOUBLE PRECISION, ALLOCATABLE :: xgrid(:)
   DOUBLE PRECISION, DIMENSION(:, :), POINTER :: MassMat
   LOGICAL :: periodic1, periodic2
   NAMELIST /newrun/ nx, a, b, coefx, nidbas1, nidbas2, periodic1, periodic2
 !===========================================================================
 !                  1.0 Set up grids
 !
 !   Read in data specific to run
 !
   nx = 8    ! Number of intevals in x
   a = 0.0d0  ! Left boundary of interval
   b = 1.0d0  ! Right boundary of interval
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   periodic1 = .FALSE.
   periodic2 = .FALSE.
   nidbas1 = 3
   nidbas2 = 2
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid/knots
 !
   ALLOCATE(xgrid(0:nx))
   xgrid(0 ) = a
   xgrid(nx) = b
   CALL meshdist(coefx, xgrid, nx)
   WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
 !===========================================================================
 !                  2.0 Set up splines
 !
   ngauss = 1 ! Gauss points initialized with set_spline are in fact not used
              ! for computing cross mass matrix
   ! First spline set up as for solving a PDE with FEMs
   CALL set_spline(nidbas1, ngauss, xgrid, sp1, periodic1)
 
   ! Second spline set up as for interpolation
 !!$  CALL set_splcoef(nidbas2, xgrid, sp2, periodic2)
   CALL set_spline(nidbas2, ngauss, xgrid, sp2, periodic2)
 
   WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp1', LBOUND(sp1%knots), &
        &                       ':',UBOUND(sp1%knots), sp1%knots
   WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp2', LBOUND(sp2%knots), &
        &                       ':',UBOUND(sp2%knots), sp2%knots
   WRITE(*,'(3(a,i5, 2x))') 'NX =', nx, 'DIM sp1 =', sp1%dim, 'DIM sp2 =', sp2%dim
 !===========================================================================
 !                  3.0 Compute cross mass matrix
 !
   CALL CompMassMatrix(sp1, sp2, a, b, MassMat)
 
   WRITE(*, "(a)") "Cross-mass matrix between splines sp1 & sp2:"
   DO i = 1, SIZE(MassMat, 1)
      WRITE(*, "(15f10.5)") (MassMat(i, j), j = 1, MIN(SIZE(MassMat, 2), 15))
   END DO
 !
 !  Should equal to 1 for splines i "not close to  the boundaries":
 !                  p1 .LT. i .LE. N
 !
   WRITE(*,'(/a/(15f8.5))') 'Sum of cols * NX', SUM(MassMat,dim=2)*REAL(nx,8)
 !===========================================================================
 !                  3.0  Use DGB matrice
 !
 !!$  mrows = nx+nidbas1
 !!$  ncols = nx+nidbas2
   CALL get_dim(sp1, mrows)
   CALL get_dim(sp2, ncols)
   kl = nidbas1
   ku = nidbas2
   CALL init(kl, ku, ncols, 1, matm, mrows=mrows)
   WRITE(*,'(/a, 2i3)') 'Band matrix:, kl, ku =', kl, ku
 !
   CALL CompMassMatrix(sp1, sp2, a, b, matm)
 !
   DO i=1,SIZE(matm%val,1)
      WRITE(*,'(15f10.5)') matm%val(i,:)
   END DO
 !
   WRITE(*,'(/a)') 'Full matrix'  
   ALLOCATE(matfull(mrows,ncols))
   matfull = 0.0d0
   DO i=1,mrows
      CALL getrow(matm, i, matfull(i,:))
      WRITE(*,'(15f10.5)') matfull(i,:)
   END DO
   WRITE(*,'(a,1pe12.4)') 'error =', MAXVAL(ABS(matfull-MassMat))
 !
 !   Check VMX
   ALLOCATE(avec(ncols,2))
   ALLOCATE(bvec(mrows,2))
   avec = 1.0d0
   bvec = vmx(matm,avec)*REAL(nx,8)
   WRITE(*,'(a)') 'M*a, with a=1'
   DO j=1,2
      WRITE(*,'(15f8.5)') bvec(:,j)
   END DO
 !===========================================================================
 !                  4.0 Test complex version
 !
   CALL init(kl, ku, ncols, 1, zmatm, mrows=mrows)
   CALL CompMassMatrix(sp1, sp2, a, b, zmatm)
   ALLOCATE(zavec(ncols,2))
   ALLOCATE(zbvec(mrows,2))
   zavec = (1.0d0,0.0d0)
   zbvec = vmx(zmatm,zavec)*REAL(nx,8)
   zbvec = zbvec-bvec
   WRITE(*,'(/a)') 'Check complex version'
   WRITE(*,'(a,2(1pe12.4))') 'Norm of errors =', &
        &              (dznrm2(mrows, zbvec(1,j), 1), j=1,2)
   zmatm%val = zmatm%val-matm%val
   WRITE(*,'(a,1pe12.4)') 'Diff of matrix elements =', MAXVAL(ABS(zmatm%val))
 !===========================================================================
 !                  9.0 Epilogue
 !
   DEALLOCATE(MassMat)
   DEALLOCATE(xgrid)
   CALL destroy_sp(sp1)
   CALL destroy_sp(sp2)
   call destroy(matm)
 
 END PROGRAM main
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
diff --git a/examples/extra.c b/examples/extra.c
index 572cecd..b406698 100644
--- a/examples/extra.c
+++ b/examples/extra.c
@@ -1,49 +1,49 @@
 /**
  * @file extra.c
  *
  * @brief
  *
  * @copyright
  * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
  * SPC (Swiss Plasma Center)
  *
- * spclibs is free software: you can redistribute it and/or modify it under
+ * SPClibs is free software: you can redistribute it and/or modify it under
  * the terms of the GNU Lesser General Public License as published by the Free
  * Software Foundation, either version 3 of the License, or (at your option)
  * any later version.
  *
- * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program. If not, see <https://www.gnu.org/licenses/>.
  *
  * @authors
  * (in alphabetical order)
  * @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
  */
 /**********************************************/
 #include <string.h>
 
 void quit_();
 void dump_(char *filename, int *l);
 
 void Dump(filename)
 char *filename;
 {
   /* The user's dump routine should go here.  */
   int l = strlen(filename);
   dump_(filename, &l);
 
 }  /* End DUMP */
 
 /**********************************************/
 
 void Quit()
 {
   /* The user's quit routine should go here.  */
 
   quit_();
 
 }  /* End QUIT */
diff --git a/examples/fit1d.f90 b/examples/fit1d.f90
index db3b786..88ae5e4 100644
--- a/examples/fit1d.f90
+++ b/examples/fit1d.f90
@@ -1,251 +1,251 @@
 !>
 !> @file fit1d.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Fit grid value function to a spline of any order
 !
   USE bsplines
   USE futils
 !
   IMPLICIT NONE
   INTEGER :: nx, nidbas
   DOUBLE PRECISION :: a, b, coefx(5)
 !!$  DOUBLE PRECISION, ALLOCATABLE ::  xgrid(:), fgrid(:), coefs(:)
   DOUBLE PRECISION, ALLOCATABLE ::  xgrid(:), fgrid(:,:), coefs(:,:)
   INTEGER :: i, dim, left
   TYPE(spline1d) :: spl
   DOUBLE PRECISION :: dx
   INTEGER :: npts
   DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fcalc(:), fexact(:), err(:)
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, POINTER :: splines(:,:) => null()
 !
   CHARACTER(len=128) :: file='fit1d.h5'
   INTEGER :: fid
 !
   NAMELIST /newrun/ nx, nidbas, a, b, coefx
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 10    ! Number of intevals in x
   a = 0.0d0  ! Left boundary of interval
   b = 1.0d0  ! Right boundary of interval
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid and function values
 !
   ALLOCATE(xgrid(0:nx), fgrid(0:nx,1))
   xgrid(0) = a
   xgrid(nx) = b
   CALL meshdist(coefx, xgrid, nx)
   fgrid(:,1) = func(xgrid)
   WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
   WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'FIT1D Result File')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NIDBAS', nidbas)
 !===========================================================================
 !              2.0 Spline interpolation
 !
 !   Set up the spline interpolation
 !!$  CALL splcoef_setup(nidbas, xgrid, spl)
   CALL set_splcoef(nidbas, xgrid, spl)
   PRINT*, 'nlequid =', spl%nlequid
 !
 !   Compute spline values and derivatives at Boundaries
   ALLOCATE(fun(nidbas+1,0:nidbas))
   WRITE(*,'(/a)') 'spline at the left boundary'
   CALL locintv(spl, a, left)
   CALL basfun(a, spl, fun, left+1)
   DO i=0,nidbas
      WRITE(*,'(8(1pe12.4))') fun(:,i)
   END DO
 !
   WRITE(*,'(/a)') 'spline at the right boundary'
   CALL locintv(spl, b, left)
   CALL basfun(b, spl, fun, left+1)
   DO i=0,nidbas
      WRITE(*,'(8(1pe12.4))') fun(:,i)
   END DO
   DEALLOCATE(fun)
 !
   CALL get_dim(spl, dim)
   ALLOCATE(coefs(dim,1))
   WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', dim
   WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of splines', LBOUND(spl%knots), &
        &                       ':',UBOUND(spl%knots), spl%knots
 !
 !   From given grid values fgrid, compute the spline coefs
   CALL get_splcoef(spl, fgrid, coefs)
   WRITE(*,'(a/(10f8.3))') 'coefs', coefs
 !
 !   Plot all splines
   npts = 100
   ALLOCATE(xpt(npts))
   dx = (b-a)/REAL(npts-1)
   DO i=1,npts
      xpt(i) = a + (i-1)*dx
   END DO
   CALL allsplines(spl, xpt, splines)
   CALL putarr(fid, '/X', xpt)
   CALL putarr(fid,'/SPLINES', splines)
 !
 !   Check interpolation
   ALLOCATE(fcalc(npts), fexact(npts), err(npts))
   fexact = func(xpt)
 !
 !  Function values
   CALL gridval(spl, xpt, fcalc, 0, coefs(:,1))
   err = fexact - fcalc
   CALL putarr(fid, '/FEXACT', fexact, 'Exact values')
   CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values')
   CALL putarr(fid, '/ERROR', err, 'Interpolation errors')
   WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact)
 !
 !  Derivatives values
   CALL gridval(spl, xpt, fcalc, 1)
   fexact = func1(xpt)
   err = fexact - fcalc
   CALL putarr(fid, '/FEXACT1', fexact, 'Exact values of first derivative')
   CALL putarr(fid, '/FCALC1', fcalc, 'Interpolated first derivative')
   CALL putarr(fid, '/ERROR1', err, 'Interpolation errors on first derivative')
   WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact)
 !
 
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(xpt, splines, fexact, fcalc, err)
   DEALLOCATE(xgrid, fgrid)
   CALL destroy_sp(spl)
   CALL closef(fid)
 !===========================================================================
 !
 CONTAINS
   FUNCTION func(x)
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: func(SIZE(x))
 !!$    INTEGER :: n
 !!$    n = SIZE(x)
 !!$    func = 1.d0+x*(1.d0+x*(1.d0+x))
 !!$    func(1:n/2) = 1.0d0
 !!$    func(n/2+1:n) = 0.5d0
     func = EXP(-8.*x*x)
   END FUNCTION func
 !
   FUNCTION func1(x)
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: func1(SIZE(x))
 !!$    INTEGER :: n
 !!$    n = SIZE(x)
 !!$    func = 1.d0+x*(1.d0+x*(1.d0+x))
 !!$    func(1:n/2) = 1.0d0
 !!$    func(n/2+1:n) = 0.5d0
     func1 = -16.d0*x*EXP(-8.*x*x)
   END FUNCTION func1
 !
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        sum2 = sum2 + x(i)**2
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
 END PROGRAM main
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
diff --git a/examples/fit1d_cmpl.f90 b/examples/fit1d_cmpl.f90
index ba3c7db..acae6c0 100644
--- a/examples/fit1d_cmpl.f90
+++ b/examples/fit1d_cmpl.f90
@@ -1,106 +1,106 @@
 !>
 !> @file fit1d_cmpl.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Fit a 1d complex function
 !
   USE bsplines
   IMPLICIT NONE
   INTEGER,PARAMETER :: NX=10, NIDBAS=3, NPTS=40
   DOUBLE PRECISION :: pi, dx, xgrid(0:NX), xpt(NPTS), err
   DOUBLE COMPLEX, ALLOCATABLE :: coefs(:)
   DOUBLE COMPLEX :: fgrid(0:NX), fexact(NPTS), fcalc(NPTS)
   INTEGER :: dim, i
   TYPE(spline1d) :: spl
 !================================================================================
 !
 !   Define grid and function values on grid
 !
   pi = 4.0d0*ATAN(1.0d0)
   xgrid(0) = 0.0d0
   dx = 2.0d0*pi/NX
   DO i=1,NX
      xgrid(i) = xgrid(0) + i*dx
   END DO
 !
   fgrid = func(xgrid)
 !
   WRITE(*,'(2a10)') 'x', 'f'
   DO i=0,NX
      WRITE(*,'(3f10.4)') xgrid(i), fgrid(i)
   END DO
 !
 !   Set up spline
 !
   CALL set_splcoef(NIDBAS, xgrid, spl, period=.TRUE.)
   CALL get_dim(spl, dim)
   ALLOCATE(coefs(dim))
 !
 !   Get Spline coefficients
 !
   CALL get_splcoef(spl, fgrid, coefs)
   WRITE(*,'(a)') 'Interpolation coefs'
   DO i=1,dim
      WRITE(*,'(2(1pe12.3))')  coefs(i)
   END DO
 !
 !   Check interpolation
 !
   CALL RANDOM_NUMBER(xpt)
   xpt = (2.0d0*pi) * xpt
   fexact = func(xpt)
 !
   CALL gridval(spl, xpt, fcalc, 0, coefs)
 !
   WRITE(*,'(a10,2a20)') 'x', 'fexact', 'fcacl'
   DO i=1,NPTS
      WRITE(*,'(5f10.4)') xpt(i), fexact(i), fcalc(i)
   END DO
   err = norm2(fcalc-fexact)
   WRITE(*,'(a,1pe12.3)') 'error', err
 !
 !   Clean up
 !
   DEALLOCATE(coefs)
   CALL destroy_sp(spl)  
 !================================================================================
 CONTAINS
   FUNCTION func(x)
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE COMPLEX :: func(size(x))
     func = EXP( CMPLX(0.0d0, x))
   END FUNCTION func
 !
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of vector x
 !
     DOUBLE PRECISION :: norm2
     DOUBLE COMPLEX, INTENT(in) :: x(:)
 !
     norm2 = SQRT(DOT_PRODUCT(x,x))
   END FUNCTION norm2
 END PROGRAM main
diff --git a/examples/fit1dbc.f90 b/examples/fit1dbc.f90
index 503a469..bb1af10 100644
--- a/examples/fit1dbc.f90
+++ b/examples/fit1dbc.f90
@@ -1,254 +1,254 @@
 !>
 !> @file fit1dbc.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Fit grid value function to a spline of any order
 !  BC using derivatives at both ends.
 !
   USE bsplines
   USE futils
 !
   IMPLICIT NONE
   INTEGER :: nx, nidbas
   DOUBLE PRECISION :: a, b, coefx(5)
 !!$  DOUBLE PRECISION, ALLOCATABLE ::  xgrid(:), fgrid(:), coefs(:)
   DOUBLE PRECISION, ALLOCATABLE ::  xgrid(:), fgrid(:,:), coefs(:,:)
   INTEGER :: i, dim
   TYPE(spline1d) :: spl
   DOUBLE PRECISION :: dx
   INTEGER :: npts
   DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fcalc(:), fexact(:), err(:)
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, POINTER :: splines(:,:) => null()
   INTEGER :: ibc(2,10)
 !!$  DOUBLE PRECISION :: fbc(2,10)
   DOUBLE PRECISION :: fbc(2,10,1)
 !
   CHARACTER(len=128) :: file='fit1d.h5'
   INTEGER :: fid
 !
   NAMELIST /newrun/ nx, nidbas, a, b, coefx, ibc, fbc
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 10    ! Number of intevals in x
   a = 0.0d0  ! Left boundary of interval
   b = 1.0d0  ! Right boundary of interval
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   ibc(1,1:10) = (/2,3,4,5,6,7,8,9,10,11/)
   ibc(2,1:10) = ibc(1,1:10)
   fbc = 0.0
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid and function values
 !
   ALLOCATE(xgrid(0:nx), fgrid(0:nx,1))
   xgrid(0) = a
   xgrid(nx) = b
   CALL meshdist(coefx, xgrid, nx)
   fgrid(:,1) = func(xgrid)
   WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
   WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'FIT1D Result File')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NIDBAS', nidbas)
 !===========================================================================
 !              2.0 Spline interpolation
 !
 !   Set up the spline interpolation
   CALL set_splcoef(nidbas, xgrid, spl, ibc=ibc)
 !
 !   Compute spline values and derivatives at Boundaries
   ALLOCATE(fun(nidbas+1,0:nidbas))
   WRITE(*,'(/a)') 'spline at the left boundary'
   CALL basfun(a, spl, fun, 1)
   DO i=0,nidbas
      WRITE(*,'(8(1pe12.4))') fun(:,i)
   END DO
 !
   WRITE(*,'(/a)') 'spline at the right boundary'
   CALL basfun(b, spl, fun, nx)
   DO i=0,nidbas
      WRITE(*,'(8(1pe12.4))') fun(:,i)
   END DO
   DEALLOCATE(fun)
 !
   CALL get_dim(spl, dim)
   ALLOCATE(coefs(dim,1))
   WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', dim
   WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of splines', LBOUND(spl%knots), &
        &                       ':',UBOUND(spl%knots), spl%knots
 !
 !   From given grid values fgrid, compute the spline coefs
   CALL get_splcoef(spl, fgrid, coefs, fbc)
   WRITE(*,'(a/(10f8.3))') 'coefs', coefs
 !
 !   Plot all splines
   npts = 100
   ALLOCATE(xpt(npts))
   dx = (b-a)/REAL(npts-1)
   DO i=1,npts
      xpt(i) = a + (i-1)*dx
   END DO
   CALL allsplines(spl, xpt, splines)
   CALL putarr(fid, '/X', xpt)
   CALL putarr(fid,'/SPLINES', splines)
 !
 !   Check interpolation
   ALLOCATE(fcalc(npts), fexact(npts), err(npts))
   fexact = func(xpt)
 !
 !  Function values
   CALL gridval(spl, xpt, fcalc, 0, coefs(:,1))
   err = fexact - fcalc
   CALL putarr(fid, '/FEXACT', fexact, 'Exact values')
   CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values')
   CALL putarr(fid, '/ERROR', err, 'Interpolation errors')
   WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact)
 !
 !  Derivatives values
   CALL gridval(spl, xpt, fcalc, 1)
   fexact = func1(xpt)
   err = fexact - fcalc
   CALL putarr(fid, '/FEXACT1', fexact, 'Exact values of first derivative')
   CALL putarr(fid, '/FCALC1', fcalc, 'Interpolated first derivative')
   CALL putarr(fid, '/ERROR1', err, 'Interpolation errors on first derivative')
   WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact)
 !
 
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(xpt, splines, fexact, fcalc, err)
   DEALLOCATE(xgrid, fgrid)
   CALL destroy_sp(spl)
   CALL closef(fid)
 !===========================================================================
 !
 CONTAINS
   FUNCTION func(x)
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: func(SIZE(x))
 !!$    INTEGER :: n
 !!$    n = SIZE(x)
 !!$    func = 1.d0+x*(1.d0+x*(1.d0+x))
 !!$    func(1:n/2) = 1.0d0
 !!$    func(n/2+1:n) = 0.5d0
     func = EXP(-8.*x*x)
   END FUNCTION func
 !
   FUNCTION func1(x)
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: func1(SIZE(x))
 !!$    INTEGER :: n
 !!$    n = SIZE(x)
 !!$    func = 1.d0+x*(1.d0+x*(1.d0+x))
 !!$    func(1:n/2) = 1.0d0
 !!$    func(n/2+1:n) = 0.5d0
     func1 = -16.d0*x*EXP(-8.*x*x)
   END FUNCTION func1
 !
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        sum2 = sum2 + x(i)**2
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
 END PROGRAM main
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
diff --git a/examples/fit1dp.f90 b/examples/fit1dp.f90
index 97ca6be..29082d0 100644
--- a/examples/fit1dp.f90
+++ b/examples/fit1dp.f90
@@ -1,227 +1,227 @@
 !>
 !> @file fit1dp.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Fit grid value function to a spline of any order
 !  Periodic case.
 !
   USE bsplines
   USE futils
 !
   IMPLICIT NONE
   INTEGER :: nx, nidbas
   DOUBLE PRECISION :: a, b, coefx(5)
   DOUBLE PRECISION, ALLOCATABLE ::  xgrid(:), fgrid(:,:), coefs(:,:)
   INTEGER :: i, dim
   TYPE(spline1d) :: spl
   DOUBLE PRECISION :: dx, x0, x1
   INTEGER :: npts
   DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fcalc(:), fexact(:), err(:)
   DOUBLE PRECISION, POINTER :: splines(:,:) => null()
 !
   CHARACTER(len=128) :: file='fit1d.h5'
   INTEGER :: fid
 !
   NAMELIST /newrun/ nx, nidbas, a, b, coefx
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 10    ! Number of intevals in x
   a = 0.0d0  ! Left boundary of interval
   b = 1.0d0  ! Right boundary of interval
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid and function values
 !
   ALLOCATE(xgrid(0:nx), fgrid(0:nx,1))
   xgrid(0) = a
   xgrid(nx) = b
   CALL meshdist(coefx, xgrid, nx)
   fgrid(:,1) = func(xgrid)
   WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
   WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'FIT1D Result File')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NIDBAS', nidbas)
 !===========================================================================
 !              2.0 Spline interpolation
 !
 !   Set up the spline interpolation
   CALL set_splcoef(nidbas, xgrid, spl, period=.TRUE.)
   CALL get_dim(spl, dim)
   WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', dim
   WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of splines', LBOUND(spl%knots), &
        &                       ':',UBOUND(spl%knots), spl%knots
 !
   ALLOCATE(coefs(dim,1))
 !
 !   From given grid values fgrid, compute the spline coefs
   CALL get_splcoef(spl, fgrid, coefs)
   WRITE(*,'(a/(10f8.3))') 'coefs', coefs
 !
 !   Plot all splines
 !
   npts = 100
   ALLOCATE(xpt(npts))
 !!$  x0 = a
 !!$  x1 = b
   x0 = spl%knots(0)
   x1 = spl%knots(nx)
   dx = (x1 -x0)/REAL(npts)  ! Last point b not inluded
   DO i=1,npts
      xpt(i) = x0 + (i-1)*dx
   END DO
   CALL allsplines(spl, xpt, splines)
   CALL putarr(fid, '/X', xpt)
   CALL putarr(fid,'/SPLINES', splines)
 !
 !   Check interpolation
 !
   ALLOCATE(fcalc(npts), fexact(npts), err(npts))
   fexact = func(xpt)
 !
   CALL gridval(spl, xpt, fcalc, 0, coefs(:,1))
   err = fexact - fcalc
   CALL putarr(fid, '/FEXACT', fexact, 'Exact values')
   CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values')
   CALL putarr(fid, '/ERROR', err, 'Interpolation errors')
   WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact)
 !
 !  Derivatives values
   CALL gridval(spl, xpt, fcalc, 1)
   fexact = func1(xpt)
   err = fexact - fcalc
   CALL putarr(fid, '/FEXACT1', fexact, 'Exact values of first derivative')
   CALL putarr(fid, '/FCALC1', fcalc, 'Interpolated first derivative')
   CALL putarr(fid, '/ERROR1', err, 'Interpolation errors on first derivative')
   WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact)
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(xpt, splines, fexact, fcalc, err)
   DEALLOCATE(xgrid, fgrid)
   CALL destroy_sp(spl)
   CALL closef(fid)
 !===========================================================================
 !
 CONTAINS
   FUNCTION func(x)
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: func(SIZE(x))
     DOUBLE PRECISION :: pi
     pi = 4.0*ATAN(1.0d0)
     func = SIN(2.d0*pi*x) + 2.0d0*COS(8.d0*pi*x)
   END FUNCTION func
   FUNCTION func1(x)
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: func1(SIZE(x))
     DOUBLE PRECISION :: pi
     pi = 4.0*ATAN(1.0d0)
     func1 = 2.d0*pi*COS(2.d0*pi*x) - 16.0d0*pi*SIN(8.d0*pi*x)
   END FUNCTION func1
 !
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        sum2 = sum2 + x(i)**2
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
 END PROGRAM main
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
diff --git a/examples/fit2d.f90 b/examples/fit2d.f90
index ab598de..563cd26 100644
--- a/examples/fit2d.f90
+++ b/examples/fit2d.f90
@@ -1,159 +1,159 @@
 !>
 !> @file fit2d.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Fit 2d grid value function to a 2d spline of any order
 !
   USE bsplines
   USE futils
 !
   IMPLICIT NONE
   CHARACTER(len=128) :: file='fit2d.h5'
   INTEGER :: fid
   INTEGER :: nx, ny, nidbas(2), mbes, dims(2)
   INTEGER, PARAMETER :: nptx=100, npty=100
   DOUBLE PRECISION :: pi, coefx(5), coefy(5)
   DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:)
   DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty)
   DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs
   TYPE(spline2d) :: splxy
   DOUBLE PRECISION :: mem
   INTEGER :: i, j
 !
   NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intevals in x
   ny = 8              ! Number of intevals in y
   nidbas = (/3,3/)    ! Degree of splines
   mbes = 2
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
   fgrid = func(xgrid,ygrid)
 !
   WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny)
   IF( nx.LE.10 .AND. ny.LE.10 ) THEN
      WRITE(*,'(a)') 'FGRID'
      DO j=0,ny
         WRITE(*,'(12f8.3)') fgrid(:,j)
      END DO
      WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
   END IF
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE2D Result File')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NY', ny)
   CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
   CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
   CALL attach(fid, '/', 'MBESS', mbes)
 !===========================================================================
 !              2.0 Spline interpolation
 !
 !   Setup the spline interpolation
   CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./))
   WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
 !
 !   Compute spline interpolation coefficients
   CALL get_dim(splxy, dims)
   ALLOCATE(bcoefs(dims(1),dims(2)))
   WRITE(*,'(a,2i4)') 'Dims of spline', dims
 !
   CALL get_splcoef(splxy, fgrid, bcoefs)
 !===========================================================================
 !              2.0 Check interpolation
 !
   dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1)
   dy=(ygrid(ny)-ygrid(0))/REAL(npty-1)
   DO i=1,nptx
      xpt(i) = xgrid(0) + (i-1)*dx
   END DO
   DO i=1,npty
      ypt(i) = ygrid(0) + (i-1)*dy
   END DO
   fexact = func(xpt,ypt)
   CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs)
   errs = fcalc-fexact
   WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs)
 !
   CALL putarr(fid, '/xpt', xpt, 'r')
   CALL putarr(fid, '/ypt', ypt, '\theta')
   CALL putarr(fid, '/fcalc', fcalc, 'Interpolated')
   CALL putarr(fid, '/fexact', fexact, 'Exact')
   CALL putarr(fid, '/errs', errs, 'Errors')
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(bcoefs)
   DEALLOCATE(xgrid, ygrid, fgrid)
   CALL destroy_sp(splxy)
   CALL closef(fid)
 !
 CONTAINS
   FUNCTION func(x,y)
     DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
     DOUBLE PRECISION :: func(SIZE(x), SIZE(y))
     DOUBLE PRECISION :: zy
     INTEGER :: j
     DO j=1,SIZE(y)
        zy = -mbes * SIN(mbes*y(j))
        func(:,j) =(1-x(:)**2) * x(:)**mbes * zy
     END DO
   END FUNCTION func
 !
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        sum2 = sum2 + x(i)**2
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
 END PROGRAM main
diff --git a/examples/fit2d1d.f90 b/examples/fit2d1d.f90
index e0ffec3..23a919f 100644
--- a/examples/fit2d1d.f90
+++ b/examples/fit2d1d.f90
@@ -1,154 +1,154 @@
 !>
 !> @file fit2d1d.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Fit 2d grid value function to a 2d spline of any order
 !  Interpolating on an grid (x_i,y_j) or a set of particle
 !  positions (x_p,y_p).
 !
   USE bsplines
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, nidbas(2), mbes, dims(2)
   INTEGER, PARAMETER :: nptx=100, npty=100
   DOUBLE PRECISION :: pi, coefx(5), coefy(5)
   DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:)
   DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty)
   DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs
   DOUBLE PRECISION, DIMENSION(nptx*npty) :: xp, yp, fcalcp, fexactp, errsp
   TYPE(spline2d) :: splxy
   DOUBLE PRECISION :: mem
   INTEGER :: i, j
 !
   NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intevals in x
   ny = 8              ! Number of intevals in y
   nidbas = (/3,3/)    ! Degree of splines
   mbes = 2
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
   fgrid = func(xgrid,ygrid)
 !===========================================================================
 !              2.0 Spline interpolation
 !
 !   Setup the spline interpolation
   CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./))
 !
 !   Compute spline interpolation coefficients
   CALL get_dim(splxy, dims)
   ALLOCATE(bcoefs(dims(1),dims(2)))
   WRITE(*,'(a,2i4)') 'Dims of spline', dims
 !
   CALL get_splcoef(splxy, fgrid, bcoefs)
 !===========================================================================
 !              2.0 Check interpolation
 !
   dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1)
   dy=(ygrid(ny)-ygrid(0))/REAL(npty-1)
   DO i=1,nptx
      xpt(i) = xgrid(0) + (i-1)*dx
   END DO
   DO i=1,npty
      ypt(i) = ygrid(0) + (i-1)*dy
   END DO
   fexact = func(xpt,ypt)
   CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs)
   errs = fcalc-fexact
   WRITE(*,*) 'Using the GRIDVAL2D2D'
   WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs)
 !
 ! The 2d1d version
   WRITE(*,*) 'Using the GRIDVAL2D1D'
   CALL RANDOM_NUMBER(xp)
   CALL RANDOM_NUMBER(yp)
   yp=2.0*pi*yp
   fexactp = func1(xp,yp)
 !!$  CALL gridval(splxy, xp, yp, fcalcp, (/0,0/), bcoefs)
   CALL gridval(splxy, xp, yp, fcalcp, (/0,0/))
   errsp = fcalcp-fexactp
   WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errsp), MAXVAL(errsp)
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(bcoefs)
   DEALLOCATE(xgrid, ygrid, fgrid)
   CALL destroy_sp(splxy)
 !
 CONTAINS
   FUNCTION func(x,y)
     DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
     DOUBLE PRECISION :: func(SIZE(x), SIZE(y))
     DOUBLE PRECISION :: zy
     INTEGER :: j
     DO j=1,SIZE(y)
        zy = -mbes * SIN(mbes*y(j))
        func(:,j) =(1-x(:)**2) * x(:)**mbes * zy
     END DO
   END FUNCTION func
   FUNCTION func1(x,y)
     DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
     DOUBLE PRECISION :: func1(SIZE(x))
     DOUBLE PRECISION :: zy
     INTEGER :: j
     DO j=1,SIZE(x)
        zy = -mbes * SIN(mbes*y(j))
        func1(j) =(1-x(j)**2) * x(j)**mbes * zy
     END DO
   END FUNCTION func1
 !
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        sum2 = sum2 + x(i)**2
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
 END PROGRAM main
diff --git a/examples/fit2d_cmpl.f90 b/examples/fit2d_cmpl.f90
index 6e6a4ee..4436ed0 100644
--- a/examples/fit2d_cmpl.f90
+++ b/examples/fit2d_cmpl.f90
@@ -1,132 +1,132 @@
 !>
 !> @file fit2d_cmpl.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Fit 2d grid value function to a 2d spline of any order
 !
   USE bsplines
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, nidbas(2), mbes, dims(2)
   INTEGER, PARAMETER :: npt=10000
   DOUBLE PRECISION :: pi, coefx(5), coefy(5)
   DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:)
   DOUBLE COMPLEX, ALLOCATABLE :: fgrid(:,:),  fgrid_calc(:,:), bcoefs(:,:)
   DOUBLE PRECISION :: dx, dy, xpt(npt), ypt(npt), errs(npt)
   DOUBLE COMPLEX, DIMENSION(npt) :: fcalc, fexact
   TYPE(spline2d) :: splxy
   INTEGER :: i, j
 !
   NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intevals in x
   ny = 8              ! Number of intevals in y
   nidbas = (/3,3/)    ! Degree of splines
   mbes = 2
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny))
   ALLOCATE(fgrid(0:nx,0:ny), fgrid_calc(0:nx,0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
   fgrid = func2(xgrid,ygrid)
 !===========================================================================
 !              2.0 Spline interpolation
 !
 !   Setup the spline interpolation
   CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./))
 !
 !   Compute spline interpolation coefficients
   CALL get_dim(splxy, dims)
   ALLOCATE(bcoefs(dims(1),dims(2)))
   WRITE(*,'(a,2i4)') 'Dims of spline', dims
 !
   CALL get_splcoef(splxy, fgrid, bcoefs)
 !===========================================================================
 !              2.0 Check interpolation
 !
   CALL RANDOM_NUMBER(xpt)
   CALL RANDOM_NUMBER(ypt)
   ypt(:) = ypt(:)*2.0*pi
   fexact = func1(xpt,ypt)
 !
   CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs)
   errs = ABS(fcalc-fexact)
   WRITE(*,'(a,2(1pe12.3))') 'Max errors (on random points)', MAXVAL(errs)
 !
   CALL gridval(splxy, xgrid, ygrid, fgrid_calc, (/0,0/))
   WRITE(*,'(a,2(1pe12.3))') 'Max errors (on grid points)', &
        &     MAXVAL(ABS(fgrid_calc-fgrid))
 !
   fgrid_calc = 0.0
   DO j=0,ny
      ypt(1:nx+1) = ygrid(j)
      CALL gridval(splxy, xgrid, ypt(1:nx+1), fgrid_calc(:,j), (/0,0/))
   END DO
   WRITE(*,'(a,2(1pe12.3))') 'Max errors (on grid points)', &
        &     MAXVAL(ABS(fgrid_calc-fgrid))
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(bcoefs)
   DEALLOCATE(xgrid, ygrid, fgrid, fgrid_calc)
   CALL destroy_sp(splxy)
 !
 CONTAINS
   FUNCTION func2(x,y)
     DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
     DOUBLE COMPLEX :: func2(SIZE(x), SIZE(y))
     DOUBLE COMPLEX :: zy
     INTEGER :: j
     DO j=1,SIZE(y)
        zy = -mbes * CMPLX(SIN(mbes*y(j)), COS(mbes*y(j)))
        func2(:,j) =(1-x(:)**2) * x(:)**mbes * zy
     END DO
   END FUNCTION func2
   FUNCTION func1(x,y)
     DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
     DOUBLE COMPLEX :: func1(SIZE(x))
     DOUBLE COMPLEX :: zy
     INTEGER :: j
     DO j=1,SIZE(x)
        zy = -mbes * CMPLX(SIN(mbes*y(j)), COS(mbes*y(j)))
        func1(j) =(1-x(j)**2) * x(j)**mbes * zy
     END DO
   END FUNCTION func1
 END PROGRAM main
diff --git a/examples/fit2dbc.f90 b/examples/fit2dbc.f90
index 75862c0..cae0d98 100644
--- a/examples/fit2dbc.f90
+++ b/examples/fit2dbc.f90
@@ -1,183 +1,183 @@
 !>
 !> @file fit2dbc.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Fit 2d grid value function to a 2d spline of any order
 !  BC using derivatives at both ends, in the non-periodic direction.
 !
   USE bsplines
   USE futils
 !
   IMPLICIT NONE
   CHARACTER(len=128) :: file='fit2d.h5'
   INTEGER :: fid
   INTEGER :: nx, ny, nidbas(2), mbes, dims(2)
   INTEGER, PARAMETER :: nptx=100, npty=100
   DOUBLE PRECISION :: pi, coefx(5), coefy(5)
   DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:)
   DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty)
   DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs
   TYPE(spline2d) :: splxy
   DOUBLE PRECISION :: mem
   INTEGER :: i, j, ii
   INTEGER :: ibc1(2,10), ibc2(2,10)
   DOUBLE PRECISION, ALLOCATABLE:: fbc(:,:,:)
 !
   NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy
 
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intevals in x
   ny = 8              ! Number of intevals in y
   nidbas = (/3,3/)    ! Degree of splines
   mbes = 2
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 1.0d0
   CALL meshdist(coefy, ygrid, ny)
   fgrid = func(xgrid,ygrid)
 !
   WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny)
   IF( nx.LE.10 .AND. ny.LE.10 ) THEN
      WRITE(*,'(a)') 'FGRID'
      DO j=0,ny
         WRITE(*,'(12f8.3)') fgrid(:,j)
      END DO
      WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
   END IF
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE2D Result File')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NY', ny)
   CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
   CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
   CALL attach(fid, '/', 'MBESS', mbes)
 !===========================================================================
 !              2.0 Spline interpolation
 !
 !   Setup the spline interpolation
   ii=1           ! Start with first derivative
   DO i = 1, nidbas(1)/2
      ibc1(1,i) = ii+i-1
      ibc1(2,i) = ii+i-1
   END DO
   ibc2 = ibc1
   CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .FALSE./),&
        &           ibc1=ibc1, ibc2=ibc2)
   WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
 !
 !   Compute spline interpolation coefficients
   CALL get_dim(splxy, dims)
   ALLOCATE(bcoefs(dims(1),dims(2)))
   WRITE(*,'(a,2i4)') 'Dims of spline', dims
 !
   ALLOCATE(fbc(2, nidbas(1)/2, 0:ny))
   fbc=0.0d0
 !
   WRITE(*,'(a/(10f8.3))') 'fbc(1)', fbc(1,1,:)
   WRITE(*,'(a/(10f8.3))') 'fbc(2)', fbc(2,1,:)
 !
   CALL get_splcoef(splxy, fgrid, bcoefs, fbc1=fbc, fbc2=fbc)
 !
   DEALLOCATE(fbc)
 !===========================================================================
 !              2.0 Check interpolation
 !
   dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1)
   dy=(ygrid(ny)-ygrid(0))/REAL(npty-1)
   DO i=1,nptx
      xpt(i) = xgrid(0) + (i-1)*dx
   END DO
   DO i=1,npty
      ypt(i) = ygrid(0) + (i-1)*dy
   END DO
   fexact = func(xpt,ypt)
   CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs)
   errs = fcalc-fexact
   WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs)
 !
   CALL putarr(fid, '/xpt', xpt, 'r')
   CALL putarr(fid, '/ypt', ypt, '\theta')
   CALL putarr(fid, '/bcoefs', bcoefs, 'bcoefs')
   CALL putarr(fid, '/fcalc', fcalc, 'Interpolated')
   CALL putarr(fid, '/fexact', fexact, 'Exact')
   CALL putarr(fid, '/errs', errs, 'Errors')
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(bcoefs)
   DEALLOCATE(xgrid, ygrid, fgrid)
   CALL destroy_sp(splxy)
   CALL closef(fid)
 !
 !===========================================================================
 CONTAINS
   FUNCTION func(x,y)
 !
 !   A function with zeo derivatives at both ends
 !
     DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
     DOUBLE PRECISION :: func(SIZE(x), SIZE(y))
     DOUBLE PRECISION :: zy
     INTEGER :: j
     DO j=1,SIZE(y)
        zy = y(j)*y(j)*(y(j)-1.5d0)
        func(:,j) = x(:)*x(:)*(x(:)-1.5d0) + zy
     END DO
   END FUNCTION func
 !
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        sum2 = sum2 + x(i)**2
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
 END PROGRAM main
diff --git a/examples/fit2dbc_x.f90 b/examples/fit2dbc_x.f90
index 8795668..bf64d80 100644
--- a/examples/fit2dbc_x.f90
+++ b/examples/fit2dbc_x.f90
@@ -1,202 +1,202 @@
 !>
 !> @file fit2dbc_x.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Fit 2d grid value function to a 2d spline of any order
 !  BC using derivatives at both ends, in the non-periodic direction.
 !
 ! Testing BC on derivative along first direction
 !
   USE bsplines
   USE futils
 !
   IMPLICIT NONE
   CHARACTER(len=128) :: file='fit2d.h5'
   INTEGER :: fid
   INTEGER :: nx, ny, nidbas(2), mbes, dims(2)
   INTEGER, PARAMETER :: nptx=100, npty=100
   DOUBLE PRECISION :: pi, coefx(5), coefy(5)
   DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:)
   DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty)
   DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs
   TYPE(spline2d) :: splxy
   DOUBLE PRECISION :: mem
   INTEGER :: i, j, ii
   INTEGER :: ibc(2,10)
   DOUBLE PRECISION, ALLOCATABLE:: fbc(:,:,:)
 !
   NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy
 
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intevals in x
   ny = 8              ! Number of intevals in y
   nidbas = (/3,3/)    ! Degree of splines
   mbes = 2
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
   fgrid = func(xgrid,ygrid)
 !
   WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny)
   IF( nx.LE.10 .AND. ny.LE.10 ) THEN
      WRITE(*,'(a)') 'FGRID(x, y)'
      DO j=0,ny
         WRITE(*,'(12f8.3)') fgrid(:,j)
      END DO
      WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
   END IF
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE2D Result File')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NY', ny)
   CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
   CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
   CALL attach(fid, '/', 'MBESS', mbes)
 !===========================================================================
 !              2.0 Spline interpolation
 !
 !   Setup the spline interpolation
   ii=1           ! Start with first derivative
   DO i = 1, nidbas(1)/2
      ibc(1,i) = ii+i-1
      ibc(2,i) = ii+i-1
   END DO
   CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./),&
        &           ibc1=ibc)
   WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
 !
 !   Compute spline interpolation coefficients
   CALL get_dim(splxy, dims)
   ALLOCATE(bcoefs(dims(1),dims(2)))
   WRITE(*,'(a,2i4)') 'Dims of spline', dims
 !
   ALLOCATE(fbc(2, nidbas(1)/2, 0:ny))
   fbc=0.0d0
 !
 !!$!   Exact first derivatives at boundaries
 !!$  fbc(1,1:1,:) = func1(xgrid(0:0), ygrid(0:ny))
 !!$  fbc(2,1:1,:) = func1(xgrid(nx:nx), ygrid(0:ny))
 !
 !!$!   Derivatives at boundaries approximated with FD
 !!$  DO j=0,ny
 !!$     fbc(1,1,j+1) = fgrid(1,j)-fgrid(0,j)
 !!$     fbc(2,1,j+1) = fgrid(nx,j)-fgrid(nx-1,j)
 !!$  END DO
 !!$  fbc(1,1,:) = fbc(1,1,:)/(xgrid(1)-xgrid(0))
 !!$  fbc(2,1,:) = fbc(2,1,:)/(xgrid(nx)-xgrid(nx-1))
 !
   WRITE(*,'(a/(10f8.3))') 'fbc(1)', fbc(1,1,:)
   WRITE(*,'(a/(10f8.3))') 'fbc(2)', fbc(2,1,:)
 !
   CALL get_splcoef(splxy, fgrid, bcoefs, fbc1=fbc)
 !
   DEALLOCATE(fbc)
 !===========================================================================
 !              2.0 Check interpolation
 !
   dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1)
   dy=(ygrid(ny)-ygrid(0))/REAL(npty-1)
   DO i=1,nptx
      xpt(i) = xgrid(0) + (i-1)*dx
   END DO
   DO i=1,npty
      ypt(i) = ygrid(0) + (i-1)*dy
   END DO
   fexact = func(xpt,ypt)
   CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs)
   errs = fcalc-fexact
   WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs)
 !
   CALL putarr(fid, '/xpt', xpt, 'r')
   CALL putarr(fid, '/ypt', ypt, '\theta')
   CALL putarr(fid, '/fcalc', fcalc, 'Interpolated')
   CALL putarr(fid, '/fexact', fexact, 'Exact')
   CALL putarr(fid, '/errs', errs, 'Errors')
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(bcoefs)
   DEALLOCATE(xgrid, ygrid, fgrid)
   CALL destroy_sp(splxy)
   CALL closef(fid)
 !
 !===========================================================================
 CONTAINS
   FUNCTION func(x,y)
     DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
     DOUBLE PRECISION :: func(SIZE(x), SIZE(y))
     DOUBLE PRECISION :: zy
     INTEGER :: j
     DO j=1,SIZE(y)
        zy = -mbes * SIN(mbes*y(j))
        func(:,j) =(1-x(:)**2) * x(:)**mbes * zy
     END DO
   END FUNCTION func
   FUNCTION func1(x,y)
     DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
     DOUBLE PRECISION :: func1(SIZE(x), SIZE(y))
     DOUBLE PRECISION :: zy
     INTEGER :: j
     DO j=1,SIZE(y)
        zy = -mbes * SIN(mbes*y(j))
        func1(:,j) = (mbes - (mbes+2.0d0)*x(:)**2) * x(:)**(mbes-1) * zy
     END DO
   END FUNCTION func1
 !
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        sum2 = sum2 + x(i)**2
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
 END PROGRAM main
diff --git a/examples/fit2dbc_y.f90 b/examples/fit2dbc_y.f90
index ff73d65..08b4dcb 100644
--- a/examples/fit2dbc_y.f90
+++ b/examples/fit2dbc_y.f90
@@ -1,202 +1,202 @@
 !>
 !> @file fit2dbc_y.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Fit 2d grid value function to a 2d spline of any order
 !  BC using derivatives at both ends, in the non-periodic direction.
 !
 ! Testing BC on derivative along second direction
 !
   USE bsplines
   USE futils
 !
   IMPLICIT NONE
   CHARACTER(len=128) :: file='fit2d.h5'
   INTEGER :: fid
   INTEGER :: nx, ny, nidbas(2), mbes, dims(2)
   INTEGER, PARAMETER :: nptx=100, npty=100
   DOUBLE PRECISION :: pi, coefx(5), coefy(5)
   DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:)
   DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty)
   DOUBLE PRECISION, DIMENSION(npty,nptx) :: fcalc, fexact, errs
   TYPE(spline2d) :: splxy
   DOUBLE PRECISION :: mem
   INTEGER :: i, j, ii
   INTEGER :: ibc(2,10)
   DOUBLE PRECISION, ALLOCATABLE:: fbc(:,:,:)
 !
   NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy
 
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intevals in x
   ny = 8              ! Number of intevals in y
   nidbas = (/3,3/)    ! Degree of splines
   mbes = 2
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:ny,0:nx))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
   fgrid = TRANSPOSE(func(xgrid,ygrid))
 !
   WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny)
   IF( nx.LE.10 .AND. ny.LE.10 ) THEN
      WRITE(*,'(a)') 'FGRID(y, x)'
      DO j=0,ny
         WRITE(*,'(12f8.3)') fgrid(j,:)
      END DO
      WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
   END IF
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE2D Result File')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NY', ny)
   CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
   CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
   CALL attach(fid, '/', 'MBESS', mbes)
 !===========================================================================
 !              2.0 Spline interpolation
 !
 !   Setup the spline interpolation
   ii=1           ! Start with first derivative
   DO i = 1, nidbas(1)/2
      ibc(1,i) = ii+i-1
      ibc(2,i) = ii+i-1
   END DO
   CALL set_splcoef((/nidbas(2), nidbas(1)/), ygrid, xgrid, splxy, (/.TRUE., .FALSE./),&
        &           ibc2=ibc)
   WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
 !
 !   Compute spline interpolation coefficients
   CALL get_dim(splxy, dims)
   ALLOCATE(bcoefs(dims(1),dims(2)))
   WRITE(*,'(a,2i4)') 'Dims of spline', dims
 !
   ALLOCATE(fbc(2, nidbas(1)/2, 0:ny))
   fbc=0.0d0
 !
 !!$!   Exact first derivatives at boundaries
 !!$  fbc(1,1:1,:) = func1(xgrid(0:0), ygrid(0:ny))
 !!$  fbc(2,1:1,:) = func1(xgrid(nx:nx), ygrid(0:ny))
 !
 !!$!   Derivatives at boundaries approximated with FD
 !!$  DO j=0,ny
 !!$     fbc(1,1,j+1) = fgrid(1,j)-fgrid(0,j)
 !!$     fbc(2,1,j+1) = fgrid(nx,j)-fgrid(nx-1,j)
 !!$  END DO
 !!$  fbc(1,1,:) = fbc(1,1,:)/(xgrid(1)-xgrid(0))
 !!$  fbc(2,1,:) = fbc(2,1,:)/(xgrid(nx)-xgrid(nx-1))
 !
   WRITE(*,'(a/(10f8.3))') 'fbc(1)', fbc(1,1,:)
   WRITE(*,'(a/(10f8.3))') 'fbc(2)', fbc(2,1,:)
 !
   CALL get_splcoef(splxy, fgrid, bcoefs, fbc2=fbc)
 !
   DEALLOCATE(fbc)
 !===========================================================================
 !              2.0 Check interpolation
 !
   dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1)
   dy=(ygrid(ny)-ygrid(0))/REAL(npty-1)
   DO i=1,nptx
      xpt(i) = xgrid(0) + (i-1)*dx
   END DO
   DO i=1,npty
      ypt(i) = ygrid(0) + (i-1)*dy
   END DO
   fexact = TRANSPOSE(func(xpt,ypt))
   CALL gridval(splxy, ypt, xpt, fcalc, (/0,0/), bcoefs)
   errs = fcalc-fexact
   WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs)
 !
   CALL putarr(fid, '/xpt', xpt, 'r')
   CALL putarr(fid, '/ypt', ypt, '\theta')
   CALL putarr(fid, '/fcalc', fcalc, 'Interpolated')
   CALL putarr(fid, '/fexact', fexact, 'Exact')
   CALL putarr(fid, '/errs', errs, 'Errors')
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(bcoefs)
   DEALLOCATE(xgrid, ygrid, fgrid)
   CALL destroy_sp(splxy)
   CALL closef(fid)
 !
 !===========================================================================
 CONTAINS
   FUNCTION func(x,y)
     DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
     DOUBLE PRECISION :: func(SIZE(x), SIZE(y))
     DOUBLE PRECISION :: zy
     INTEGER :: j
     DO j=1,SIZE(y)
        zy = -mbes * SIN(mbes*y(j))
        func(:,j) =(1-x(:)**2) * x(:)**mbes * zy
     END DO
   END FUNCTION func
   FUNCTION func1(x,y)
     DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
     DOUBLE PRECISION :: func1(SIZE(x), SIZE(y))
     DOUBLE PRECISION :: zy
     INTEGER :: j
     DO j=1,SIZE(y)
        zy = -mbes * SIN(mbes*y(j))
        func1(:,j) = (mbes - (mbes+2.0d0)*x(:)**2) * x(:)**(mbes-1) * zy
     END DO
   END FUNCTION func1
 !
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        sum2 = sum2 + x(i)**2
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
 END PROGRAM main
diff --git a/examples/getgrad_perf.f90 b/examples/getgrad_perf.f90
index 0a168c9..79a22f0 100644
--- a/examples/getgrad_perf.f90
+++ b/examples/getgrad_perf.f90
@@ -1,221 +1,221 @@
 !>
 !> @file getgrad_perf.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test and compare performance of using "spline" and
 !   "pp" forms. 2D case
 !
   USE bsplines
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, ngauss(2), nidbas(2), nits
   INTEGER :: npt, d1, d2
   INTEGER :: i, j, its, ngroup=4
   INTEGER :: i1, i2, nset, nremain
   DOUBLE PRECISION :: pi, dx, dy
   DOUBLE PRECISION :: seconds, t0, t1, tscal, tvec
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: coefs
   DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), fgrid00(:), fgrid01(:), fgrid10(:)
   DOUBLE PRECISION, ALLOCATABLE :: fgrad00(:), fgrad01(:), fgrad10(:)
   TYPE(spline2d) :: splxy
 !
   NAMELIST /newrun/ nx, ny, nidbas, npt, nits
 !===============================================================================
 !                    0.0  Prologue
 !
 !   2D grid
 !
   nx = 8
   ny = 8
   nidbas = (/ 3, 3 /)
   npt = 100000
   nits =100
   READ(*,newrun)
   WRITE(*,newrun)
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny))
   dx = 1.0d0/REAL(nx)
   xgrid = (/ (i*dx,i=0,nx) /)
   dy = 2.0d0*pi/REAL(ny)
   ygrid = (/ (j*dy,j=0,ny) /)
 !
   WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
 !
 !   Set up spline
 !
   ngauss = 4
   CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, (/.FALSE., .TRUE./))
   d1 = splxy%sp1%dim
   d2 = splxy%sp2%dim
   WRITE(*,'(a,3i4)') 'd1, d2 =', d1, d2
   WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
   WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
 !
   ALLOCATE(xpt(npt), ypt(npt))
   CALL RANDOM_NUMBER(xpt)
   CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt
 !
   ALLOCATE(coefs(d1,d2))
   ALLOCATE(fgrad00(npt), fgrad01(npt), fgrad10(npt))
   ALLOCATE(fgrid00(npt), fgrid01(npt), fgrid10(npt))
 !
 !===============================================================================
 !                      1.0 PPFORM
 !
   coefs = 1.0d0 ! => f=1, all derivatives = 0!
 !
   splxy%sp1%nlppform = .TRUE.
   splxy%sp2%nlppform = .TRUE.
   CALL gridval(splxy, xpt, ypt, fgrid00, (/0,0/), coefs)
 !
 !   Vector GRIDVAL
   WRITE(*,'(/a/a5,2a12)') 'Vector ppform', 'N', 't(s)', 'SpeedUp'
   ngroup = 1
   DO WHILE (ngroup.LT.npt/2)
      nset = npt/ngroup
      nremain = MODULO(npt, ngroup)
      IF(nremain.NE.0) nset = nset+1
      t0 = seconds()
      i2 = 0
      DO i=1,nset
         i1 = i2+1
         i2 = MIN(i2+ngroup,npt)
         DO its=1,nits
            CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid00(i1:i2), (/0,0/))
            CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid01(i1:i2), (/0,1/))
            CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid10(i1:i2), (/1,0/))
         END DO
      END DO
      t1 = seconds()-t0
      tvec = t1/REAL(npt*nits)
      IF(ngroup.EQ.1) tscal=tvec
      WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec
      ngroup = 2*ngroup
   END DO
   WRITE(*,'(/a,3(1pe12.3))') 'GRIDVAL PPFORM: Max errors', &
        & MAXVAL(ABS(fgrid00-1.0d0)), MAXVAL(ABS(fgrid01)), MAXVAL(ABS(fgrid10))
 !
 !   Vector GETGRAD
   WRITE(*,'(/a/a5,2a12)') 'Vector ppform', 'N', 't(s)', 'SpeedUp'
   ngroup = 1
   DO WHILE (ngroup.LT.npt/2)
      nset = npt/ngroup
      nremain = MODULO(npt, ngroup)
      IF(nremain.NE.0) nset = nset+1
      t0 = seconds()
      i2 = 0
      DO i=1,nset
         i1 = i2+1
         i2 = MIN(i2+ngroup,npt)
         DO its=1,nits
            CALL getgrad(splxy, xpt(i1:i2), ypt(i1:i2), &
                 &   fgrad00(i1:i2), fgrad10(i1:i2), fgrad01(i1:i2))
         END DO
      END DO
      t1 = seconds()-t0
      tvec = t1/REAL(npt*nits)
      IF(ngroup.EQ.1) tscal=tvec
      WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec
      ngroup = 2*ngroup
   END DO
   WRITE(*,'(/a,3(1pe12.3))') 'GETGRAD PPFORM: Max errors', &
        & MAXVAL(ABS(fgrad00-fgrid00)), MAXVAL(ABS(fgrad01-fgrid01)), &
        & MAXVAL(ABS(fgrad10-fgrid10))
 !===============================================================================
 !                      2.0 Spline expansion
 !
   coefs = 1.0d0 ! => f=1, all derivatives = 0!
 !
   splxy%sp1%nlppform = .FALSE.
   splxy%sp2%nlppform = .FALSE.
   CALL gridval(splxy, xpt, ypt, fgrid00, (/0,0/), coefs)
 !
 !   Vector GRIDVAL
   WRITE(*,'(/a/a5,2a12)') 'Vector spline', 'N', 't(s)', 'SpeedUp'
   ngroup = 1
   DO WHILE (ngroup.LT.npt/2)
      nset = npt/ngroup
      nremain = MODULO(npt, ngroup)
      IF(nremain.NE.0) nset = nset+1
      t0 = seconds()
      i2 = 0
      DO i=1,nset
         i1 = i2+1
         i2 = MIN(i2+ngroup,npt)
         DO its=1,nits
            CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid00(i1:i2), (/0,0/))
            CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid01(i1:i2), (/0,1/))
            CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid10(i1:i2), (/1,0/))
         END DO
      END DO
      t1 = seconds()-t0
      tvec = t1/REAL(npt*nits)
      IF(ngroup.EQ.1) tscal=tvec
      WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec
      ngroup = 2*ngroup
   END DO
   WRITE(*,'(/a,3(1pe12.3))') 'GRIDVAL SPLINE: Max errors', &
        & MAXVAL(ABS(fgrid00-1.0d0)), MAXVAL(ABS(fgrid01)), MAXVAL(ABS(fgrid10))
 !
 !   Vector GETGRAD
   WRITE(*,'(/a/a5,2a12)') 'Vector spline', 'N', 't(s)', 'SpeedUp'
   ngroup = 1
   DO WHILE (ngroup.LT.npt/2)
      nset = npt/ngroup
      nremain = MODULO(npt, ngroup)
      IF(nremain.NE.0) nset = nset+1
      t0 = seconds()
      i2 = 0
      DO i=1,nset
         i1 = i2+1
         i2 = MIN(i2+ngroup,npt)
         DO its=1,nits
            CALL getgrad(splxy, xpt(i1:i2), ypt(i1:i2), &
                 &   fgrad00(i1:i2), fgrad10(i1:i2), fgrad01(i1:i2))
         END DO
      END DO
      t1 = seconds()-t0
      tvec = t1/REAL(npt*nits)
      IF(ngroup.EQ.1) tscal=tvec
      WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec
      ngroup = 2*ngroup
   END DO
   WRITE(*,'(/a,3(1pe12.3))') 'GETGRAD SPLINE: Max errors', &
        & MAXVAL(ABS(fgrad00-fgrid00)), MAXVAL(ABS(fgrad01-fgrid01)), &
        & MAXVAL(ABS(fgrad10-fgrid10))
 !===============================================================================
 
 !
 !   Clean up
 !
   CALL destroy_sp(splxy)
   DEALLOCATE(xgrid, ygrid, coefs)
   DEALLOCATE(xpt, ypt, fgrid00, fgrid01, fgrid10)
   DEALLOCATE(fgrad00, fgrad01, fgrad10)
 END PROGRAM main
diff --git a/examples/gridval_perf.f90 b/examples/gridval_perf.f90
index d098531..f8d7e5d 100644
--- a/examples/gridval_perf.f90
+++ b/examples/gridval_perf.f90
@@ -1,194 +1,194 @@
 !>
 !> @file gridval_perf.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test and compare performance of using "spline" and
 !   "pp" forms. 2D case
 !
   USE bsplines
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, ngauss(2), nidbas(2), nits
   INTEGER :: npt, d1, d2
   INTEGER :: i, j, its, ngroup=4
   INTEGER :: i1, i2, nset, nremain
   DOUBLE PRECISION :: pi, dx, dy
   DOUBLE PRECISION :: seconds, t0, t1, tscal, tvec
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: coefs
   DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), fpt00(:), fpt01(:), fpt10(:)
   DOUBLE PRECISION, ALLOCATABLE :: fscal00(:), fscal01(:), fscal10(:)
   TYPE(spline2d) :: splxy
 !
   NAMELIST /newrun/ nx, ny, nidbas, npt, nits
 !===============================================================================
 !                    0.0  Prologue
 !
 !   2D grid
 !
   nx = 8
   ny = 8
   nidbas = (/ 3, 3 /)
   npt = 100000
   nits =100
   READ(*,newrun)
   WRITE(*,newrun)
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny))
   dx = 1.0d0/REAL(nx)
   xgrid = (/ (i*dx,i=0,nx) /)
   dy = 2.0d0*pi/REAL(ny)
   ygrid = (/ (j*dy,j=0,ny) /)
 !
   WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
 !
 !   Set up spline
 !
   ngauss = 4
   CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, (/.FALSE., .TRUE./))
   d1 = splxy%sp1%dim
   d2 = splxy%sp2%dim
   WRITE(*,'(a,3i4)') 'd1, d2 =', d1, d2
   WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
   WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
 !===============================================================================
 !                      1.0 PPFORM
 !
   ALLOCATE(xpt(npt), ypt(npt))
   CALL RANDOM_NUMBER(xpt)
   CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt
 !
   ALLOCATE(coefs(d1,d2))
   ALLOCATE(fscal00(npt), fscal01(npt), fscal10(npt))
   ALLOCATE(fpt00(npt), fpt01(npt), fpt10(npt))
 !
   coefs = 1.0d0 ! => f=1, all derivatives = 0!
 !
   splxy%sp1%nlppform = .TRUE.
   splxy%sp2%nlppform = .TRUE.
   CALL gridval(splxy, xpt, ypt, fscal00, (/0,0/), coefs)
 !
 !    Scalar PPFORM
   t0 = seconds()
   DO i=1,npt
      DO its=1,nits
         CALL gridval(splxy, xpt(i), ypt(i), fscal00(i), (/0,0/))
         CALL gridval(splxy, xpt(i), ypt(i), fscal01(i), (/0,1/))
         CALL gridval(splxy, xpt(i), ypt(i), fscal10(i), (/1,0/))
      END DO
   END DO
   t1 = seconds()-t0
   tscal =  t1/REAL(npt*nits,8)
   WRITE(*,'(/a,3(1pe12.3))') 'Scalar PPFORM: Max errors', &
        & MAXVAL(ABS(fscal00-1.0d0)), MAXVAL(ABS(fscal01)), MAXVAL(ABS(fscal10))
   WRITE(*,'(a,3(1pe12.3))') 'time(s)', tscal
 !
 !   Vector PPFORM
   WRITE(*,'(/a/a5,2a12)') 'Vector ppform', 'N', 't(s)', 'SpeedUp'
   ngroup = 1
   DO WHILE (ngroup.LT.npt/2)
      nset = npt/ngroup
      nremain = MODULO(npt, ngroup)
      IF(nremain.NE.0) nset = nset+1
      t0 = seconds()
      i2 = 0
      DO i=1,nset
         i1 = i2+1
         i2 = MIN(i2+ngroup,npt)
         DO its=1,nits
            CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt00(i1:i2), (/0,0/))
            CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt01(i1:i2), (/0,1/))
            CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt10(i1:i2), (/1,0/))
         END DO
      END DO
      t1 = seconds()-t0
      tvec = t1/REAL(npt*nits)
      WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec
      ngroup = 2*ngroup
   END DO
   WRITE(*,'(/a,3(1pe12.3))') 'Vector PPFORM: Max errors', &
        & MAXVAL(ABS(fpt00-fscal00)), MAXVAL(ABS(fpt01-fscal01)), MAXVAL(ABS(fpt10-fscal10))
 !===============================================================================
 !                      2.0 Sline expansion
 !
   coefs = 1.0d0 ! => f=1, all derivatives = 0!
 !
   splxy%sp1%nlppform = .FALSE.
   splxy%sp2%nlppform = .FALSE.
   CALL gridval(splxy, xpt, ypt, fscal00, (/0,0/), coefs)
 !
 !    Scalar SPLINE
   t0 = seconds()
   DO i=1,npt
      DO its=1,nits
         CALL gridval(splxy, xpt(i), ypt(i), fscal00(i), (/0,0/))
         CALL gridval(splxy, xpt(i), ypt(i), fscal01(i), (/0,1/))
         CALL gridval(splxy, xpt(i), ypt(i), fscal10(i), (/1,0/))
      END DO
   END DO
   t1 = seconds()-t0
   tscal =  t1/REAL(npt*nits,8)
   WRITE(*,'(/a,3(1pe12.3))') 'Scalar SPLINE: Max errors', &
        & MAXVAL(ABS(fscal00-1.0d0)), MAXVAL(ABS(fscal01)), MAXVAL(ABS(fscal10))
   WRITE(*,'(a,3(1pe12.3))') 'time(s)', tscal
 !
 !   Vector SPLINE
   WRITE(*,'(/a/a5,2a12)') 'Vector spline', 'N', 't(s)', 'SpeedUp'
   ngroup = 1
   DO WHILE (ngroup.LT.npt/2)
      nset = npt/ngroup
      nremain = MODULO(npt, ngroup)
      IF(nremain.NE.0) nset = nset+1
      t0 = seconds()
      i2 = 0
      DO i=1,nset
         i1 = i2+1
         i2 = MIN(i2+ngroup,npt)
         DO its=1,nits
            CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt00(i1:i2), (/0,0/))
            CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt01(i1:i2), (/0,1/))
            CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt10(i1:i2), (/1,0/))
         END DO
      END DO
      t1 = seconds()-t0
      tvec = t1/REAL(npt*nits)
      WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec
      ngroup = 2*ngroup
   END DO
   WRITE(*,'(/a,3(1pe12.3))') 'Vector SPLINE: Max errors', &
        & MAXVAL(ABS(fpt00-fscal00)), MAXVAL(ABS(fpt01-fscal01)), MAXVAL(ABS(fpt10-fscal10))
 !===============================================================================
 
 !
 !   Clean up
 !
   CALL destroy_sp(splxy)
   DEALLOCATE(xgrid, ygrid, coefs)
   DEALLOCATE(xpt, ypt, fpt00, fpt01, fpt10)
   DEALLOCATE(fscal00, fscal01, fscal10)
 END PROGRAM main
diff --git a/examples/gyro.f90 b/examples/gyro.f90
index d5a79e8..3c7cb9f 100644
--- a/examples/gyro.f90
+++ b/examples/gyro.f90
@@ -1,179 +1,179 @@
 !>
 !> @file gyro.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Gyro-average using splines.
 !   F(x)=cos(x) => \bar F(x,rho) = J_0(rho) cos(x)
 !
   USE bsplines
   USE futils
 !
   IMPLICIT NONE
   INTEGER, PARAMETER :: nx=10, nidbas=3, dim=nx+nidbas, npt=100, &
        &                nrho=21, nnq=5
   DOUBLE PRECISION :: xgrid(0:nx), fgrid(0:nx), coefs(dim)
   DOUBLE PRECISION :: xpt(npt), fcalc(npt), fexact(npt)
   DOUBLE PRECISION :: averf(0:nx), averfexact(0:nx)
   DOUBLE PRECISION, POINTER :: splines(:,:)
   TYPE(spline1d) :: spl
   DOUBLE PRECISION :: pi, twopi, dx, lperiod, dth
   DOUBLE PRECISION :: drho, rho(nrho), erraver(nrho,nnq)
   INTEGER :: i, j, nq(nnq)
   DOUBLE PRECISION :: dbesj0
 !
   CHARACTER(len=128) :: file='gyro.h5'
   INTEGER :: fid
 !
   INTERFACE
      SUBROUTINE gyro(spl, xgrid, coefs, rho, nq, averf)
        USE bsplines
        TYPE(spline1d) :: spl
        DOUBLE PRECISION, INTENT(in) :: xgrid(0:), coefs(:), rho
        INTEGER, INTENT(in) :: nq
        DOUBLE PRECISION, INTENT(out) :: averf(0:)
      END SUBROUTINE gyro
   END INTERFACE
 !
   pi = 4.0d0*ATAN(1.0d0)
   twopi = 2.0d0*pi
 !
 !   Create hdf5 file
   CALL creatf(file, fid, 'gyro Result File')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NIDBAS', nidbas)
 !
 !   Grid and function values
   dx = twopi/nx
   xgrid(0) = 0.0d0
   DO i=1,nx
      xgrid(i) = xgrid(0) + i*dx
   END DO
   lperiod = xgrid(nx)-xgrid(0)
   fgrid = func(xgrid)
 !
 !   Spline interpolation
   CALL set_splcoef(nidbas, xgrid, spl, period=.TRUE.)
   CALL get_splcoef(spl, fgrid, coefs)
   WRITE(*,'(a)') 'Spline coefficients'
   WRITE(*,'(i5,f12.4)') (i-1, coefs(i), i=1,dim)
 !
 !   Error of interpolation
   CALL RANDOM_NUMBER(xpt)
   xpt = twopi*xpt
   CALL gridval(spl, xpt, fcalc, 0, coefs)
   fexact = func(xpt)
 !!$  WRITE(*,'(a)') 'Interpolated and exact f'
 !!$  WRITE(*,'(3(1pe12.3))') (xpt(i), fcalc(i), fexact(i), i=1,npt)
   WRITE(*,'(a,1pe12.3)') 'Interpolation error', norm2(fcalc-fexact)
   CALL putarr(fid, '/X', xpt)
   CALL putarr(fid, '/FEXACT', fexact, 'Exact values')
   CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values')
 !
 !   Gyro-averaged of F at grid points xgrid(0:nx-1)
   drho = 5.0d0/nrho
   DO i=1,nrho
      rho(i) = i*drho
      DO j=1,nnq
         nq(j) = 2**(j+1)
         CALL gyro(spl, xgrid, coefs, rho(i), nq(j), averf)
         averfexact = dbesj0(rho(i))*COS(xgrid)
         erraver(i,j) = norm2(averfexact-averf)
      END DO
   END DO
 !
   WRITE(*,'(a,f8.3,i5)') 'averaged f at rho, nq =', rho(nrho), nq(nnq)
   WRITE(*,'(3(1pe12.3))') (xgrid(i),averf(i),averfexact(i), i=0,nx)
   CALL putarr(fid, '/XGRID', xgrid)
   CALL putarr(fid, '/AVERF', averf, 'Averaged F')
   CALL putarr(fid, '/AVERFEXACT', averfexact, 'Averaged F exact')
 !
   CALL putarr(fid, '/RHO', rho)
   CALL putarr(fid, '/NQ', nq)
   CALL putarr(fid, '/ERRAVER', erraver)
 !
 !   Clean up
   CALL destroy_sp(spl)
   CALL closef(fid)
 CONTAINS
   FUNCTION func(x)
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: func(SIZE(x))
     func = COS(x)
   END FUNCTION func
   FUNCTION norm2(x)
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:)
     norm2 = SQRT(DOT_PRODUCT(x,x))
   END FUNCTION norm2
 END PROGRAM main
 !
 SUBROUTINE gyro(spl, xgrid, coefs, rho, nq, averf)
 !
 !  Gyro-average using spline SPL and NQ point quadratire for
 !  theta-integration.
 !
   USE bsplines
   IMPLICIT NONE
   TYPE(spline1d) :: spl
   DOUBLE PRECISION, INTENT(in) :: xgrid(0:), coefs(:), rho
   INTEGER, INTENT(in) :: nq
   DOUBLE PRECISION, INTENT(out) :: averf(0:)
 !
   DOUBLE PRECISION :: th(nq), wth(nq), xq(nq)
   DOUBLE PRECISION, ALLOCATABLE :: avermat(:,:)
   DOUBLE PRECISION :: pi, twopi, lperiod, dth
   DOUBLE PRECISION, POINTER :: splines(:,:)
   INTEGER :: i, j, iq, nx, dim
 !
   pi = 4.0d0*ATAN(1.0d0)
   twopi = 2.0d0*pi
 !
 !   Quadrature in theta
   dth = twopi/nq
   th(1) = -pi + dth/2.0d0
   DO iq=2,nq
      th(iq) = th(iq-1)+dth
   END DO
   wth = dth
 !
 !   Gyro-averaging matrix
   CALL get_dim(spl, dim, nx)
   lperiod = xgrid(nx)-xgrid(0)
   ALLOCATE(avermat(0:nx,dim))
   DO i=0,nx
      xq = xgrid(i) + rho*COS(th)
      xq = xgrid(0) + MODULO(xq-xgrid(0), lperiod)
      CALL allsplines(spl, xq, splines)
      DO j=1,dim
         avermat(i,j) = DOT_PRODUCT(wth, splines(:,j))/twopi
      END DO
   END DO
 !
 !   Gyro-averaged of F at grid points xgrid(0:nx)
   averf = MATMUL(avermat, coefs)
 !
   DEALLOCATE(avermat)
 END SUBROUTINE gyro
diff --git a/examples/ibcmat.f90 b/examples/ibcmat.f90
index d48ea56..12ec56f 100644
--- a/examples/ibcmat.f90
+++ b/examples/ibcmat.f90
@@ -1,176 +1,176 @@
 !>
 !> @file ibcmat.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 SUBROUTINE ibcmat(mat, ny)
 !
 !   Apply BC on matrix
 !
   USE matrix
   IMPLICIT NONE
   TYPE(gbmat), INTENT(inout) :: mat
   INTEGER, INTENT(in) :: ny
   INTEGER :: kl, ku, nrank, i, j
   DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
   INTEGER :: i0, ii
   INTEGER :: i0_arr(ny), col(ny)
 !===========================================================================
 !              1.0 Prologue
 !
 
   kl = mat%kl
   ku = mat%ku
   nrank = mat%rank
   ALLOCATE(zsum(nrank), arr(nrank))
 !
   i0 = nrank - ku
   WRITE(*,'(a,i6)') 'Estimated i0', i0
   DO i=1,ny
      arr = 0.0d0
      col(i) = nrank-ny+i
      CALL getcol(mat, nrank-ny+i, arr)
      DO ii=1,nrank
         i0_arr(i)=ii
         IF(arr(ii) .NE. 0.0d0) EXIT
      END DO
   END DO
 !!$  WRITE(*,'(a/(10i6))') 'col', col
 !!$  WRITE(*,'(a/(10i6))') 'i0_arr', i0_arr
 !
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum on the NY-th row
 !
   zsum = 0.0d0
   DO i=1,ny
      arr = 0.0d0
      CALL getrow(mat, i, arr)
      DO j=1,ny+ku
         zsum(j) = zsum(j) + arr(j)
      END DO
   END DO
   CALL putrow(mat, ny, zsum)
 !
 !   The horizontal sum on the NY-th column
 !
   zsum = 0.0d0
   DO j=1,ny
      arr = 0.0d0
      CALL getcol(mat, j, arr)
      DO i=ny,ny+kl
         zsum(i) = zsum(i) + arr(i)
      END DO
   END DO
   CALL putcol(mat, ny, zsum)
 !
 !   The away operator
 !
   DO j = 1,ny-1
      arr = 0.0d0; arr(j) = 1.0d0
      CALL putcol(mat, j, arr)     
   END DO
 !
   DO i = 1,ny-1
      arr = 0.0d0; arr(i) = 1.0d0
      CALL putrow(mat, i, arr)     
   END DO
 !  
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
   DO j = nrank, nrank-ny+1, -1
      arr = 0.0d0; arr(j) = 1.0d0
      CALL putcol(mat, j, arr)     
   END DO
 !
   DO i = nrank, nrank-ny+1, -1
      arr = 0.0d0; arr(i) = 1.0d0
      CALL putrow(mat, i, arr)     
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(zsum, arr)
 !
 END SUBROUTINE ibcmat
 !+++
 SUBROUTINE ibcrhs(rhs, ny)
 !
 !   Apply BC on RHS
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(inout) :: rhs(:)
   INTEGER, INTENT(in) :: ny
   INTEGER :: nrank
   DOUBLE PRECISION :: zsum
 !===========================================================================
 !              1.0 Prologue
 !
   nrank = SIZE(rhs,1)
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum
 !
   zsum = SUM(rhs(1:ny))
   rhs(ny) = zsum
   rhs(1:ny-1) = 0.0d0
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
   rhs(nrank-ny+1:nrank) = 0.0d0
 END SUBROUTINE ibcrhs
 !+++
 SUBROUTINE ibcrhs3(rhs, ny)
 !
 !   Apply BC on RHS
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(inout) :: rhs(:,:)
   INTEGER, INTENT(in) :: ny
   INTEGER :: nrank, nz, k
   DOUBLE PRECISION :: zsum
 !===========================================================================
 !              1.0 Prologue
 !
   nrank = SIZE(rhs,1)
   nz = SIZE(rhs,2)
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum
 !
   DO k=1,nz
      zsum = SUM(rhs(1:ny,k))
      rhs(ny,k) = zsum
      rhs(1:ny-1,k) = 0.0d0
   END DO
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
   DO k=1,nz
      rhs(nrank-ny+1:nrank,k) = 0.0d0
   END DO
 END SUBROUTINE ibcrhs3
diff --git a/examples/mesh.f90 b/examples/mesh.f90
index aa102a9..a532f08 100644
--- a/examples/mesh.f90
+++ b/examples/mesh.f90
@@ -1,66 +1,66 @@
 !>
 !> @file mesh.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Equidistant and non-equidistant mesh
 !
   USE bsplines
   IMPLICIT NONE
   INTEGER :: i, nx
   DOUBLE PRECISION :: coefs(5), dev, dx
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid
 !
   INTERFACE
      SUBROUTINE meshdist(coefs, x, nx)
        DOUBLE PRECISION, INTENT(in) :: coefs(5)
        INTEGER, INTENT(iN) :: nx
        DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
      END SUBROUTINE meshdist
   END INTERFACE
 !
   NAMELIST /newrun/ nx,  coefs
 !
   nx = 8     ! Number oh intevals in x
   coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
                                                  ! see function FDIST in MESHDIST
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
   ALLOCATE(xgrid(0:nx))
   xgrid(0) = 0.0d0
   xgrid(nx) = 1.0d0
   CALL meshdist(coefs, xgrid, nx)
   PRINT*, 'Mesh is equidistant?', is_equid(xgrid, dev)
   PRINT*, 'dev =', dev
 !
   dx = 1.0d0/REAL(nx,8)
   xgrid = (/ (i*dx, i=0,nx) /)
   PRINT*, 'Mesh is equidistant?', is_equid(xgrid, dev)
   PRINT*, 'dev =', dev
 !
   DEALLOCATE(xgrid)
 END PROGRAM main
diff --git a/examples/meshdist.f90 b/examples/meshdist.f90
index a20c2fd..f6e24f8 100644
--- a/examples/meshdist.f90
+++ b/examples/meshdist.f90
@@ -1,82 +1,82 @@
 !>
 !> @file meshdist.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct a 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 
diff --git a/examples/moments.f90 b/examples/moments.f90
index 0cb925b..ac4b874 100644
--- a/examples/moments.f90
+++ b/examples/moments.f90
@@ -1,228 +1,228 @@
 !>
 !> @file moments.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !   Compute moments of f(x), using its Spline representation
 !
 MODULE globals
   USE bsplines
   USE matrix
   IMPLICIT NONE
   DOUBLE PRECISION, PARAMETER   :: pi = 3.14159265359d0
   DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), rhs(:), sol(:)
   DOUBLE PRECISION, ALLOCATABLE :: finteg(:,:), moms(:)
   TYPE(spline1d), SAVE :: splx
   TYPE(gbmat), SAVE    :: mat
 CONTAINS
 !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dismat(spl, mat)
 !
 !   Assembly FE matrix mat using spline spl
 !
     TYPE(spline1d), INTENT(in) :: spl
     TYPE(gbmat), INTENT(inout) :: mat
     INTEGER :: nrank, nx, nidbas, ngauss
     INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
     DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
     DOUBLE PRECISION :: contrib
 !
     INTEGER :: kterms                ! Number of terms in weak form
     INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
     DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl, nrank, nx, nidbas)
     ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
 !
 !   Weak form
 !
     kterms = mat%nterms
     ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl, ngauss)
     ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
     DO i=1,nx
        CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
        DO igauss=1,ngauss
           CALL basfun(xgauss(igauss), spl, fun, i)
           CALL coefeq(xgauss(igauss), idert, iderw, coefs)
           DO iterm=1,kterms
              DO jt=0,nidbas
                 DO iw=0,nidbas
                    contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
                         &    fun(iw,iderw(iterm)) * wgauss(igauss)
                    irow=i+iw; jcol=i+jt
                    CALL updtmat(mat, irow, jcol, contrib)
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(fun)
     DEALLOCATE(xgauss, wgauss)
     DEALLOCATE(iderw, idert, coefs)
 !
   CONTAINS
     SUBROUTINE coefeq(x, idt, idw, c)
       DOUBLE PRECISION, INTENT(in) :: x
       INTEGER, INTENT(out) :: idt(:), idw(:)
       DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
 !
 ! Mass matrix
 !
       c(1) = 1.d0
       idt(1) = 0
       idw(1) = 0
     END SUBROUTINE coefeq
   END SUBROUTINE dismat
 !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE disrhs(spl, rhs)
 !
 !   Assenbly the RHS using spline spl
 !
     TYPE(spline1d), INTENT(in) :: spl
     DOUBLE PRECISION, INTENT(out) :: rhs(:)
     INTEGER :: nrank, nx, nidbas, ngauss
     INTEGER :: i, igauss
     DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
     DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl, nrank, nx, nidbas)
 !
     ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl, ngauss)
     ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
     rhs(1:nrank) = 0.0d0
 !
     DO i=1,nx
        CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
        DO igauss=1,ngauss
           CALL basfun(xgauss(igauss), spl, fun, i)
           contrib = wgauss(igauss)*rhseq(xgauss(igauss))
           rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(fun)
     DEALLOCATE(xgauss, wgauss)
 !
   CONTAINS
     DOUBLE PRECISION FUNCTION rhseq(v)
       DOUBLE PRECISION, INTENT(in) :: v
       rhseq = SQRT(1.0d0/(2.0d0*pi)) * EXP(-0.5d0*v**2)
     END FUNCTION rhseq
   END SUBROUTINE disrhs
 END MODULE globals
 !
 !================================================================================
 PROGRAM main
   USE bsplines
   USE globals
   IMPLICIT NONE
   INTEGER          :: nidbas, nx, nmoms, ngauss, rank, kl, ku
   INTEGER          :: i
   DOUBLE PRECISION :: a, b, dx
 !
 !   Input
 !
   WRITE(*,'(a)') 'Enter, nidbas, a, b, nx, nmoms'
   READ(*,*) nidbas, a, b, nx, nmoms
 !
 !   Equidistant mesh
 !
   ALLOCATE(xgrid(0:nx))
   dx = (b-a)/REAL(nx)
   DO i=0, nx
      xgrid(i) = a + i*dx
   END DO
   WRITE(*,'(a/(8(1pe12.4)))') 'XGRID', xgrid
 !
 !   Set up spline
 !
   ngauss = nidbas+1
   CALL set_spline(nidbas, ngauss, xgrid, splx)
 !
 !    Mass matrix
 !
   CALL get_dim(splx, rank)       ! Rank of the FE Mass matrix
   kl = nidbas
   ku = kl
   CALL init(kl, ku, rank, 1, mat)
   WRITE(*,'(a,3i6)') 'kl, ku, rank', kl, ku, rank
   CALL dismat(splx, mat)
 !
 !   RHS
 !
   ALLOCATE(rhs(rank), sol(rank))
   CALL disrhs(splx, rhs)
 !
 !   Solve for Spline coefs
 !
   CALL factor(mat)
   CALL bsolve(mat, rhs, sol)
   WRITE(*,'(a/(8(1pe12.4)))') 'SOL', sol
   WRITE(*,'(a,1pe20.12)') ' Integral of sol using FINTG', fintg(splx, sol)
 !
 !   Moments
 !
   ALLOCATE(finteg(rank,0:nmoms), moms(0:nmoms))
   CALL calc_integ(splx, finteg)
   DO i=0,nmoms
      moms(i) = DOT_PRODUCT(sol(:), finteg(:,i))
   END DO
   WRITE(*,'(a,i3)') 'Moments of orders from 0 to', nmoms
   WRITE(*,'(8(1pe20.12))') moms
 !
   DEALLOCATE(finteg, moms)
   DEALLOCATE(rhs, sol)
   DEALLOCATE(xgrid)
   CALL destroy(mat)
   CALL destroy_sp(splx)
 END PROGRAM main
diff --git a/examples/optim1.f90 b/examples/optim1.f90
index 8db612e..017aead 100644
--- a/examples/optim1.f90
+++ b/examples/optim1.f90
@@ -1,138 +1,138 @@
 !>
 !> @file optim1.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test and compare performance of using "spline" and
 !   "pp" forms. 1D case
 !
   USE bsplines
 !
   IMPLICIT NONE
   INTEGER :: nx, nidbas, nrank, npt=1000000
   INTEGER :: i
   DOUBLE PRECISION :: dx
   DOUBLE PRECISION :: seconds, t0, t1
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, coefs
   DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fpt0(:), fpt1(:), fun(:, :)
   INTEGER, ALLOCATABLE :: left(:)
   TYPE(spline1d) :: splx
 !
   NAMELIST /newrun/ nx, nidbas, npt
 !===============================================================================
 !
 !   1D grid
 !
   nx = 10
   nidbas = 3
   npt = 1000000
   READ(*,newrun)
   WRITE(*,newrun)
 
   ALLOCATE(xgrid(0:nx))
   dx = 1.0d0/REAL(nx)
   xgrid = (/ (i*dx,i=0,nx) /)
   WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
 !
 !   Set up spline
 !
   CALL set_spline(nidbas, 4, xgrid, splx)
   nrank = splx%dim
   WRITE(*,'(a, i5)') 'nrank =', nrank
   WRITE(*,'(a/(10f8.3))') 'knots', splx%knots
 !
   ALLOCATE(xpt(npt))
   ALLOCATE(left(npt))
   ALLOCATE(fun(0:nidbas,0:1))  ! Values and first derivatives of all Splines
   CALL RANDOM_NUMBER(xpt)
   CALL locintv(splx, xpt, left)
 !===============================================================================
 !
 !   Check def_basfun_opt
 !
   CALL basfun_recur(xpt(101), splx, fun, left(101)+1)
   WRITE(*,'(/a,f20.15,i4/(2f20.15))') 'BASFUN_RECUR at X=', xpt(101), left(101),&
        & (fun(:,i), i=0,1)
 !
 !!$  CALL def_basfun(xpt(101), splx, fun)
   CALL basfun(xpt(101), splx, fun, left(101)+1)
   WRITE(*,'(/a,f20.15/(2f20.15))') 'DEF_BASFUN at X=', xpt(101), &
        & (fun(:,i), i=0,1)
 !
 !   Performance of basis function computations
 !
   t0 = seconds()
   DO i=1,npt
      CALL basfun_recur(xpt(i), splx, fun, left(i)+1)
   END DO
   WRITE(*,'(/a,1pe12.3)') 'BASFUN_RECUR time (s)', (seconds()-t0)/REAL(npt)
 !
   t0 = seconds()
   DO i=1,npt
 !!$     CALL def_basfun(xpt(i), splx, fun)
      CALL basfun(xpt(i), splx, fun, left(i)+1)
   END DO
   WRITE(*,'(/a,1pe12.3)') 'DEF_BASFUN time (s)', (seconds()-t0)/REAL(npt)
 !===============================================================================
 !
 !   Check and performance of GRIDVAL using PPFORM and SPLINE expansion
 !
   ALLOCATE(coefs(nrank))
   DEALLOCATE(xpt)
   ALLOCATE(xpt(npt), fpt0(npt), fpt1(npt))
   CALL RANDOM_NUMBER(xpt)
 !
   splx%nlppform = .TRUE.
   coefs = 1.0d0
 !
   CALL gridval(splx, xpt(1:1), fpt0(1:1), 0, coefs)
 !
   t0 = seconds()
   CALL gridval(splx, xpt, fpt1, 1)
   CALL gridval(splx, xpt, fpt0, 0)
   t1 = seconds()-t0
   WRITE(*,'(/a,2(1pe12.3))') 'PPFORM: Max errors', &
        & MAXVAL(ABS(fpt0-1.0d0)) ,MAXVAL(ABS(fpt1))
   WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt)
 !
   splx%nlppform = .FALSE.
   coefs = 1.0d0
   CALL gridval(splx, xpt(1:1), fpt0(1:1), 0, coefs)
   t0 = seconds()
   CALL gridval(splx, xpt, fpt1, 1)
   CALL gridval(splx, xpt, fpt0, 0)
   t1 = seconds()-t0
   WRITE(*,'(/a,2(1pe12.3))') 'SPLINES: Max errors', &
        & MAXVAL(ABS(fpt0-1.0d0)) ,MAXVAL(ABS(fpt1))
   WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt)
 !===============================================================================
 !
 !   Clean up
 !
   CALL destroy_sp(splx)
   DEALLOCATE(xgrid, coefs)
   DEALLOCATE(xpt, fpt0, fpt1)
   DEALLOCATE(fun)
 END PROGRAM main
diff --git a/examples/optim2.f90 b/examples/optim2.f90
index ffb9773..bf18a77 100644
--- a/examples/optim2.f90
+++ b/examples/optim2.f90
@@ -1,119 +1,119 @@
 !>
 !> @file optim2.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test and compare performance of using "spline" and
 !   "pp" forms. 2D case
 !
   USE bsplines
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, ngauss(2), nidbas(2)
   INTEGER :: npt, d1, d2
   INTEGER :: i, j
   DOUBLE PRECISION :: pi, dx, dy
   DOUBLE PRECISION :: seconds, t0, t1
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: coefs
   DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), fpt00(:), fpt01(:), fpt10(:)
   TYPE(spline2d) :: splxy
 !
   NAMELIST /newrun/ nx, ny, nidbas, npt
 !===============================================================================
 !
 !   2D grid
 !
   nx = 8
   ny = 8
   nidbas = (/ 3, 3 /)
   npt = 1000000
   READ(*,newrun)
   WRITE(*,newrun)
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny))
   dx = 1.0d0/REAL(nx)
   xgrid = (/ (i*dx,i=0,nx) /)
   dy = 2.0d0*pi/REAL(ny)
   ygrid = (/ (j*dy,j=0,ny) /)
 !
   WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
 !
 !   Set up spline
 !
   ngauss = 4
   CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, (/.FALSE., .TRUE./))
   d1 = splxy%sp1%dim
   d2 = splxy%sp2%dim
   WRITE(*,'(a,3i4)') 'd1, d2 =', d1, d2
   WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
   WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
 !===============================================================================
 !
 !   Check and performance of GRIDVAL using PPFORM and SPLINE expansion
 !
   ALLOCATE(xpt(npt), ypt(npt))
   CALL RANDOM_NUMBER(xpt)
   CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt
 !
   ALLOCATE(coefs(d1,d2))
   ALLOCATE(fpt00(npt), fpt01(npt), fpt10(npt))
 !
   coefs = 1.0d0
 !
   splxy%sp1%nlppform = .TRUE.
   splxy%sp2%nlppform = .TRUE.
   CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/), coefs)
 !
   t0 = seconds()
   CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/))
   CALL gridval(splxy, xpt, ypt, fpt01, (/0,1/))
   CALL gridval(splxy, xpt, ypt, fpt10, (/1,0/))
   t1 = seconds()-t0
   WRITE(*,'(/a,3(1pe12.3))') 'PPFORM: Max errors', &
        & MAXVAL(ABS(fpt00-1.0d0)), MAXVAL(ABS(fpt01)), MAXVAL(ABS(fpt10))
   WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt)
 !
   splxy%sp1%nlppform = .FALSE.
   splxy%sp2%nlppform = .FALSE.
   CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/), coefs)
   t0 = seconds()
   CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/))
   CALL gridval(splxy, xpt, ypt, fpt01, (/0,1/))
   CALL gridval(splxy, xpt, ypt, fpt10, (/1,0/))
   t1 = seconds()-t0
   WRITE(*,'(/a,3(1pe12.3))') 'SPLINES: Max errors', &
        & MAXVAL(ABS(fpt00-1.0d0)), MAXVAL(ABS(fpt01)), MAXVAL(ABS(fpt10))
   WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt)
 !===============================================================================
 !
 !   Clean up
 !
   CALL destroy_sp(splxy)
   DEALLOCATE(xgrid, ygrid, coefs)
   DEALLOCATE(xpt, ypt, fpt00, fpt01, fpt10)
 END PROGRAM main
diff --git a/examples/optim3.f90 b/examples/optim3.f90
index 9a617a0..5c50aba 100644
--- a/examples/optim3.f90
+++ b/examples/optim3.f90
@@ -1,137 +1,137 @@
 !>
 !> @file optim3.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test and compare performance of using "spline" and
 !   "pp" forms. 2D1D case
 !
   USE bsplines
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, nz, ngauss(3), nidbas(3)
   INTEGER :: npt, d1, d2, d3
   INTEGER :: i, j, k
   DOUBLE PRECISION :: pi, dx, dy, dz
   DOUBLE PRECISION :: seconds, t0, t1
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, zgrid
   DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: coefs
   DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), zpt(:)
   DOUBLE PRECISION, ALLOCATABLE :: fpt000(:), fpt100(:), fpt010(:), fpt001(:)
   TYPE(spline2d1d) :: splxyz
 !
   NAMELIST /newrun/ nx, ny, nz, nidbas, npt
 !===============================================================================
 !
 !   2D grid
 !
   nx = 8
   ny = 8
   nz = 8
   nidbas = (/ 3, 3, 3 /)
   npt = 1000000
   READ(*,newrun)
   WRITE(*,newrun)
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz))
   dx = 1.0d0/REAL(nx)
   xgrid = (/ (i*dx,i=0,nx) /)
   dy = 2.0d0*pi/REAL(ny)
   ygrid = (/ (j*dy,j=0,ny) /)
   dz = 2.0d0*pi/REAL(nz)
   zgrid = (/ (k*dz,k=0,nz) /)
 !
   WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
   WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz)
 !
 !   Set up spline
 !
   ngauss = 4
   CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid, splxyz, &
        &         (/.FALSE., .TRUE., .TRUE./))
   d1 = splxyz%sp12%sp1%dim
   d2 = splxyz%sp12%sp2%dim
   d3 = splxyz%sp3%dim
   WRITE(*,'(a,3i4)') 'd1, d2, d3 =', d1, d2, d3
   WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxyz%sp12%sp1%knots
   WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxyz%sp12%sp2%knots
   WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Z', splxyz%sp3%knots
 !===============================================================================
 !
 !   Check and performance of GRIDVAL using PPFORM and SPLINE expansion
 !
   ALLOCATE(xpt(npt), ypt(npt), zpt(npt))
   CALL RANDOM_NUMBER(xpt)
   CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt
   CALL RANDOM_NUMBER(zpt); zpt = 2.0d0*pi*zpt
 !
   ALLOCATE(coefs(d1,d2,d3))
   ALLOCATE(fpt000(npt), fpt100(npt), fpt010(npt), fpt001(npt))
 !
   coefs = 1.0d0
 !
   splxyz%sp12%sp1%nlppform = .TRUE.
   splxyz%sp12%sp2%nlppform = .TRUE.
   splxyz%sp3%nlppform = .TRUE.
   CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/), coefs)
   t0 = seconds()
   CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/))
   CALL gridval(splxyz, xpt, ypt, zpt, fpt100, (/1,0,0/))
   CALL gridval(splxyz, xpt, ypt, zpt, fpt010, (/0,1,0/))
   CALL gridval(splxyz, xpt, ypt, zpt, fpt001, (/0,0,1/))
   t1 = seconds()-t0
   WRITE(*,'(/a,4(1pe12.3))') 'PPFORM: Max errors', &
        & MAXVAL(ABS(fpt000-1.0d0)), &
        & MAXVAL(ABS(fpt100)), &
        & MAXVAL(ABS(fpt010)), &
        & MAXVAL(ABS(fpt001))
   WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt)
 !
   splxyz%sp12%sp1%nlppform = .FALSE.
   splxyz%sp12%sp2%nlppform = .FALSE.
   splxyz%sp3%nlppform = .FALSE.
   CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/), coefs)
   t0 = seconds()
   CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/))
   CALL gridval(splxyz, xpt, ypt, zpt, fpt100, (/1,0,0/))
   CALL gridval(splxyz, xpt, ypt, zpt, fpt010, (/0,1,0/))
   CALL gridval(splxyz, xpt, ypt, zpt, fpt001, (/0,0,1/))
   t1 = seconds()-t0
   WRITE(*,'(/a,4(1pe12.3))') 'SPLINES: Max errors', &
        & MAXVAL(ABS(fpt000-1.0d0)), &
        & MAXVAL(ABS(fpt100)), &
        & MAXVAL(ABS(fpt010)), &
        & MAXVAL(ABS(fpt001))
   WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt)
 !===============================================================================
 !
 !   Clean up
 !
   CALL destroy_sp(splxyz)
   DEALLOCATE(xgrid, ygrid, zgrid, coefs)
   DEALLOCATE(xpt, ypt, fpt000,fpt100, fpt010, fpt001)
 END PROGRAM main
diff --git a/examples/pde1d.f90 b/examples/pde1d.f90
index 86584b8..74fd2ea 100644
--- a/examples/pde1d.f90
+++ b/examples/pde1d.f90
@@ -1,419 +1,419 @@
 !>
 !> @file pde1d.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Solving the following 1d differential  eqation using splines:
 !
 !    -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0
 !    exact solution: f(r) = 1 - r^k
 !
   USE bsplines
   USE matrix
   USE futils
   USE conmat_mod
   IMPLICIT NONE
   INTEGER :: nx, nidbas, ngauss, kdiff
   INTEGER :: i, nrank, kl, ku
   LOGICAL :: nlppform
   DOUBLE PRECISION :: coefs(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol
   TYPE(spline1d) :: splx
   TYPE(gbmat) :: mat
 !
   CHARACTER(len=128) :: file='pde1d.h5'
   INTEGER :: fid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: solcal, solana, errsol
   DOUBLE PRECISION :: seconds, t0, tmat, tfact, tsolv
 !
   INTERFACE
      SUBROUTINE dismat(spl, mat)
        USE bsplines
        USE matrix
        TYPE(spline1d), INTENT(in) :: spl
        TYPE(gbmat), INTENT(inout) :: mat
      END SUBROUTINE dismat
      SUBROUTINE disrhs(kdiff, spl, rhs)
        USE bsplines
        INTEGER, INTENT(in) :: kdiff
        TYPE(spline1d), INTENT(in) :: spl
        DOUBLE PRECISION, INTENT(out) :: rhs(:)
      END SUBROUTINE disrhs
      SUBROUTINE meshdist(coefs, x, nx)
        DOUBLE PRECISION, INTENT(in) :: coefs(5)
        INTEGER, INTENT(iN) :: nx
        DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
      END SUBROUTINE meshdist
   END INTERFACE
 !
   NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8     ! Number oh intevals in x
   nidbas = 3 ! Degree of splines
   ngauss = 4 ! Number of Gauss points/interval
   kdiff = 2  ! Exponent of differential problem
   nlppform = .TRUE. ! Use PPFORM for gridval or not
   coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
                                                  ! see function FDIST in MESHDIST
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x axis
 !
   ALLOCATE(xgrid(0:nx))
   xgrid(0) = 0.0d0
   xgrid(nx) = 1.0d0
   CALL meshdist(coefs, xgrid, nx)
   WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
   WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE1D Result File')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NIDBAS', nidbas)
   CALL attach(fid, '/', 'NGAUSS', ngauss)
   CALL attach(fid, '/', 'KDIFF', kdiff)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   t0 = seconds()
   CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
   CALL get_dim(splx, nrank)       ! Rank of the FE matrix
   WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
 !
 !   FE matrix assembly
 !
   kl = nidbas
   ku = kl
   CALL init(kl, ku, nrank, 1, mat)
   WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank
 !!$  CALL dismat(splx, mat)
   CALL conmat(splx, mat, coefeq)
 !
   ALLOCATE(arr(nrank))
 !!$  WRITE(*,'(/a)') 'Matrice before BC'
 !!$  DO i=1,nrank
 !!$     CALL getrow(mat, i, arr)
 !!$     WRITE(*,'(12f8.3)') arr, SUM(arr)
 !!$  END DO
 !
 !   RHS assembly
 !
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(kdiff, splx, rhs)
 !!$  WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs
 !
 !   Set BC f(r=1) = 0 on matrix
 !
   arr(1:nrank-1) = 0.0d0
   arr(nrank) = 1.0d0
   CALL putrow(mat, nrank, arr)
   CALL putcol(mat, nrank, arr)
   tmat = seconds() - t0
 !!$  WRITE(*,'(/a)') 'Matrice after BC'
 !!$  DO i=1,nrank
 !!$     CALL getrow(mat, i, arr)
 !!$     WRITE(*,'(12f8.3)') arr
 !!$  END DO
 !
 !   Set BC f(r=1) = 0 on RHS
 !
   rhs(nrank) = 0.0d0
 !!$  WRITE(*,'(/a,/(12(f8.3)))') 'RHS after BC', rhs
   CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
   CALL putarr(fid, '/MAT',  mat%val, 'GB matrice with BC')
   CALL attach(fid, '/MAT', 'KL', mat%kl)
   CALL attach(fid, '/MAT', 'KU', mat%ku)
   CALL attach(fid, '/MAT', 'RANK', mat%rank)
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
   t0 = seconds()
   CALL factor(mat)
   tfact = seconds() - t0
 
   t0 = seconds()
   CALL bsolve(mat, rhs, sol)
   tsolv = seconds() - t0
 !!$  WRITE(*,'(/a,/(12(f8.3)))') 'SOL', sol
   CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
 !
   WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splx, sol), &
        &                    fintg(splx, sol)-REAL(kdiff,8)/REAL(kdiff+1,8)
 !===========================================================================
 !              4.0 Check the solution and its 1st derivative
 !
   ALLOCATE(solcal(0:nx,0:2), solana(0:nx,0:2), errsol(0:nx,0:2))
   DO i =0,nx
      solana(i,0) = 1.0d0-xgrid(i)**kdiff
      solana(i,1) = -kdiff*xgrid(i)**(kdiff-1)
      solana(i,2) = -kdiff*(kdiff-1)*xgrid(i)**(kdiff-2)
   END DO
   CALL gridval(splx, xgrid, solcal(:,0), 0, sol) ! Compute PPFORM and grid values
   CALL gridval(splx, xgrid, solcal(:,1), 1) ! 1st derivative
   CALL gridval(splx, xgrid, solcal(:,2), 2) ! 2nd derivative
   errsol = solana - solcal
 !
   CALL putarr(fid, '/XGRID', xgrid)
   CALL putarr(fid, '/SOLCAL', solcal)
   CALL putarr(fid, '/SOLANA', solana)
   CALL putarr(fid, '/ERR', errsol)
 !
   CALL creatg(fid, '/spline')
   CALL attach(fid, '/spline', 'order', splx%order)
   CALL putarr(fid, '/spline/knots', splx%knots, 'Spline knots')
   WRITE(*,'(a,3(1pe12.3))') 'Rel. discretization errors (solution and derivatives).', &
        &    (SQRT( DOT_PRODUCT(errsol(:,i),errsol(:,i)) / &
        &    DOT_PRODUCT(solana(:,i),solana(:,i)) ), i=0,2)
 !
   WRITE(*,'(a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(xgrid, rhs, sol)
   DEALLOCATE(solcal, solana, errsol)
   DEALLOCATE(arr)
   CALL destroy(mat)
   CALL destroy_sp(splx)
   CALL closef(fid)
 CONTAINS
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
 ! Weak form = Int(x*dw/dx*dt/dx)dx
 !
     c(1) = x
     idt(1) = 1
     idw(1) = 1
   END SUBROUTINE coefeq
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function defined in FDIST
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
 SUBROUTINE dismat(spl, mat)
 !
 !   Assembly FE matrix mat using spline spl
 !
   USE bsplines
   USE matrix
   IMPLICIT NONE
   TYPE(spline1d), INTENT(in) :: spl
   TYPE(gbmat), INTENT(inout) :: mat
   INTEGER :: nrank, nx, nidbas, ngauss
   INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE PRECISION :: contrib
 !
   INTEGER :: kterms                ! Number of terms in weak form
   INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
   DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, nrank, nx, nidbas)
   ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
 !
 !   Weak form
 !
   kterms = mat%nterms
   ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         CALL coefeq(xgauss(igauss), idert, iderw, coefs)
         DO iterm=1,kterms
            DO jt=0,nidbas
               DO iw=0,nidbas
                  contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
                       &    fun(iw,iderw(iterm)) * wgauss(igauss)
                  irow=i+iw; jcol=i+jt
                  CALL updtmat(mat, irow, jcol, contrib)
               END DO
            END DO
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
   DEALLOCATE(iderw, idert, coefs)
 !
 CONTAINS
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
 !
 ! Weak form = Int(x*dw/dx*dt/dx)dx
 !
     c(1) = x
     idt(1) = 1
     idw(1) = 1
   END SUBROUTINE coefeq
 END SUBROUTINE dismat
 !+++
 SUBROUTINE disrhs(kdiff, spl, rhs)
 !
 !   Assenbly the RHS using spline spl
 !
   USE bsplines
   IMPLICIT NONE
   INTEGER, INTENT(in) :: kdiff
   TYPE(spline1d), INTENT(in) :: spl
   DOUBLE PRECISION, INTENT(out) :: rhs(:)
   INTEGER :: nrank, nx, nidbas, ngauss
   INTEGER :: i, igauss, left
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, nrank, nx, nidbas)
 !!$  WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
 !
   ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
 !!$  WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   rhs(1:nrank) = 0.0d0
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
 !!$     WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         left = i-1
 !!$        WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
         contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
         rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
 !
 CONTAINS
   DOUBLE PRECISION FUNCTION rhseq(x,k)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(in) :: k
     rhseq = k*k*x**(k-1)
   END FUNCTION rhseq
 END SUBROUTINE disrhs
diff --git a/examples/pde1d_eig.f90 b/examples/pde1d_eig.f90
index 698efea..4105b1b 100644
--- a/examples/pde1d_eig.f90
+++ b/examples/pde1d_eig.f90
@@ -1,459 +1,459 @@
 !>
 !> @file pde1d_eig.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Solving the following 1d differential  eqation using splines:
 !
 !    -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0
 !    exact solution: f(r) = 1 - r^k
 !
   USE bsplines
   USE matrix
   USE futils
   USE conmat_mod
   IMPLICIT NONE
   INTEGER :: nx, nidbas, ngauss, kdiff
   INTEGER :: i, nrank, kl, ku
   LOGICAL :: nlppform
   DOUBLE PRECISION :: coefs(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol
   TYPE(spline1d) :: splx
   TYPE(gemat) :: mat
 !!$  TYPE(gbmat) :: mat
 !
   CHARACTER(len=128) :: file='pde1d.h5'
   INTEGER :: fid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
   DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack
 !
   DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvals(:), work(:)
   INTEGER :: j, info
 !
   INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25, &
        &                lworkl=maxncv*(maxncv+8), zero=0.0d0
   DOUBLE PRECISION :: v(maxn,maxncv), workl(lworkl), workd(3*maxn), &
        &              d(maxncv,2), resid(maxn), w(maxn), &
        &              tol=0.0d0, sigma
   DOUBLE PRECISION, EXTERNAL :: dnrm2
   INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, &
        &     mode1=1, ierr
   INTEGER :: ido, ipntr(11), iparam(11)
   
   CHARACTER(len=1) :: bmat='I'
   CHARACTER(len=2) :: which='SA'
   LOGICAL :: rvec, select(maxncv)
   
 !
   INTERFACE
      SUBROUTINE dismat(spl, mat)
        USE bsplines
        USE matrix
        TYPE(spline1d), INTENT(in) :: spl
        TYPE(gbmat), INTENT(inout) :: mat
      END SUBROUTINE dismat
      SUBROUTINE disrhs(kdiff, spl, rhs)
        USE bsplines
        INTEGER, INTENT(in) :: kdiff
        TYPE(spline1d), INTENT(in) :: spl
        DOUBLE PRECISION, INTENT(out) :: rhs(:)
      END SUBROUTINE disrhs
      SUBROUTINE meshdist(coefs, x, nx)
        DOUBLE PRECISION, INTENT(in) :: coefs(5)
        INTEGER, INTENT(iN) :: nx
        DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
      END SUBROUTINE meshdist
   END INTERFACE
 !
   NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8     ! Number oh intevals in x
   nidbas = 3 ! Degree of splines
   ngauss = 4 ! Number of Gauss points/interval
   kdiff = 2  ! Exponent of differential problem
   nlppform = .TRUE. ! Use PPFORM for gridval or not
   coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
                                                  ! see function FDIST in MESHDIST
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x axis
 !
   ALLOCATE(xgrid(0:nx))
   xgrid(0) = 0.0d0
   xgrid(nx) = 1.0d0
   CALL meshdist(coefs, xgrid, nx)
   WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
   WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE1D Result File')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NIDBAS', nidbas)
   CALL attach(fid, '/', 'NGAUSS', ngauss)
   CALL attach(fid, '/', 'KDIFF', kdiff)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   t0 = seconds()
   CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
   CALL get_dim(splx, nrank)       ! Rank of the FE matrix
   WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
 !
 !   FE matrix assembly
 !
   kl = nidbas
   ku = kl
 !!$  CALL init(kl, ku, nrank, 1, mat)
   WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank
   CALL init(nrank, 1, mat)
 !!$  CALL dismat(splx, mat)
   CALL conmat(splx, mat, coefeq)
 !
   ALLOCATE(arr(nrank))
 !!$  WRITE(*,'(/a)') 'Matrice before BC'
 !!$  DO i=1,nrank
 !!$     CALL getrow(mat, i, arr)
 !!$     WRITE(*,'(12f8.3)') arr, SUM(arr)
 !!$  END DO
 !
 !   RHS assembly
 !
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(kdiff, splx, rhs)
 !!$  WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs
 !
 !   Set BC f(r=1) = 0 on matrix
 !
   arr(1:nrank-1) = 0.0d0
   arr(nrank) = 1.0d0
   CALL putrow(mat, nrank, arr)
   CALL putcol(mat, nrank, arr)
   tmat = seconds() - t0
 !
 !===========================================================================
 !              3.0  Eigevalue problem
 !
 !   Using Lapack dsyev
 !
   t0 = seconds()
   ALLOCATE(mata(nrank,nrank))
   ALLOCATE(eigvals(nrank))
   ALLOCATE(work(3*nrank))
   mata=0.0d0
   DO j=1,nrank
      mata(j:nrank,j) = mat%val(j:nrank,j)
   END DO
   CALL putarr(fid, '/MAT', mata, 'matrix A')
   CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info)
   teig = seconds() - t0
   PRINT*,'Info from DSYEV', info, arr(1)
   IF(info.EQ.0) THEN
      CALL putarr(fid, '/EIGVS', eigvals, 'eigenvalues of A')
      CALL putarr(fid, '/EIGVECS', mata, 'eigenvectors of A')
      WRITE(*,'(a/(10f10.4))') 'eigval', eigvals
   END IF
 !
 !   Using Arpack
 !
   ido = 0
   iparam(1) = ishfts
   iparam(3) = maxitr
   iparam(7) = mode1
 !
   IF(nev.GT.0) THEN
      t0 = seconds()
      DO   ! ARPACK reverse communication loop 
         CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, &
              &      ncv, v, maxn, iparam, ipntr, workd, workl,&
              &      lworkl, info)
         IF(ido.EQ.-1 .OR. ido.EQ.1) THEN
 !!$           WRITE(*,'(a/(10i4))') 'ipntr', ipntr
            CALL av(nrank,workd(ipntr(1)), workd(ipntr(2)))
            CYCLE
         END IF
         IF(info .LT. 0) THEN
            PRINT*, 'Error in _saupd with info =', info
         ELSE
            rvec = .TRUE.
            CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, &
                 &      bmat, nrank, which, nev, tol, resid, ncv, v, maxn, &
                 &      iparam, ipntr, workd, workl, lworkl, ierr )
            IF( ierr .NE. 0 ) THEN
               PRINT*,'Error in _seupd with ierr =', ierr
            ELSE
               nconv = iparam(5)
               PRINT*, '--- eigenvalues and error ---'
               DO j=1,nconv ! Residual norms
 !!$              CALL av(nrank,v(1,j), w)   ! d(1,j) is the j^th eigenvalue
 !!$              CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1)
 !!$              d(j,2) = dnrm2(nrank, w, 1)
 !!$              d(j,2) = d(j,2)/abs(d(j,1))
                  WRITE(*,'(2(1pe12.4))') d(j,1), eigvals(j)-d(j,1)
 !!$              CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A')
 !!$              CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A')
               END DO
               EXIT
            END IF
         END IF
      END DO   ! End of ARPACK reverse communication loop
      tarpack = seconds()-t0
   END IF
 !===========================================================================
 !              9.0  Epilogue
 !
   WRITE(*,'(a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig
   WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack
 !
   DEALLOCATE(xgrid, rhs, sol)
   DEALLOCATE(arr)
   CALL destroy(mat)
   CALL destroy_sp(splx)
   CALL closef(fid)
 CONTAINS
   SUBROUTINE av(n,v,w)
 !
 ! Matrix vector product: w <- Av
     INTEGER, INTENT(in) :: n
     DOUBLE PRECISION, INTENT(in) :: v(*)
     DOUBLE PRECISION, INTENT(out) :: w(*)
     w(1:n) = vmx(mat,v(1:n))
   END SUBROUTINE av
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
 ! Weak form = Int(x*dw/dx*dt/dx)dx
 !
     c(1) = x
     idt(1) = 1
     idw(1) = 1
   END SUBROUTINE coefeq
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function defined in FDIST
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
 SUBROUTINE dismat(spl, mat)
 !
 !   Assembly FE matrix mat using spline spl
 !
   USE bsplines
   USE matrix
   IMPLICIT NONE
   TYPE(spline1d), INTENT(in) :: spl
   TYPE(gbmat), INTENT(inout) :: mat
   INTEGER :: nrank, nx, nidbas, ngauss
   INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE PRECISION :: contrib
 !
   INTEGER :: kterms                ! Number of terms in weak form
   INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
   DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, nrank, nx, nidbas)
   ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
 !
 !   Weak form
 !
   kterms = mat%nterms
   ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         CALL coefeq(xgauss(igauss), idert, iderw, coefs)
         DO iterm=1,kterms
            DO jt=0,nidbas
               DO iw=0,nidbas
                  contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
                       &    fun(iw,iderw(iterm)) * wgauss(igauss)
                  irow=i+iw; jcol=i+jt
                  CALL updtmat(mat, irow, jcol, contrib)
               END DO
            END DO
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
   DEALLOCATE(iderw, idert, coefs)
 !
 CONTAINS
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
 !
 ! Weak form = Int(x*dw/dx*dt/dx)dx
 !
     c(1) = x
     idt(1) = 1
     idw(1) = 1
   END SUBROUTINE coefeq
 END SUBROUTINE dismat
 !+++
 SUBROUTINE disrhs(kdiff, spl, rhs)
 !
 !   Assenbly the RHS using spline spl
 !
   USE bsplines
   IMPLICIT NONE
   INTEGER, INTENT(in) :: kdiff
   TYPE(spline1d), INTENT(in) :: spl
   DOUBLE PRECISION, INTENT(out) :: rhs(:)
   INTEGER :: nrank, nx, nidbas, ngauss
   INTEGER :: i, igauss, left
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, nrank, nx, nidbas)
 !!$  WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
 !
   ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
 !!$  WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   rhs(1:nrank) = 0.0d0
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
 !!$     WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         left = i-1
 !!$        WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
         contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
         rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
 !
 CONTAINS
   DOUBLE PRECISION FUNCTION rhseq(x,k)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(in) :: k
     rhseq = k*k*x**(k-1)
   END FUNCTION rhseq
 END SUBROUTINE disrhs
diff --git a/examples/pde1d_eig_csr.f90 b/examples/pde1d_eig_csr.f90
index 02996ca..d8e4067 100644
--- a/examples/pde1d_eig_csr.f90
+++ b/examples/pde1d_eig_csr.f90
@@ -1,469 +1,469 @@
 !>
 !> @file pde1d_eig_csr.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Solving the following 1d differential  eqation using splines:
 !
 !    -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0
 !    exact solution: f(r) = 1 - r^k
 !
   USE bsplines
   USE csr
   USE futils
   USE conmat_mod
   IMPLICIT NONE
   INTEGER :: nx, nidbas, ngauss, kdiff
   INTEGER :: i, nrank, kl, ku
   LOGICAL :: nlppform
   DOUBLE PRECISION :: coefs(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol
   TYPE(spline1d) :: splx
   TYPE(csr_mat) :: mat
 !!$  TYPE(gemat) :: mat
 !!$  TYPE(gbmat) :: mat
 !
   CHARACTER(len=128) :: file='pde1d.h5'
   INTEGER :: fid, ffid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
   DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack
 !
   DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvals(:), work(:)
   INTEGER :: j, info
 !
   INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25, &
        &                lworkl=maxncv*(maxncv+8), zero=0.0d0
   DOUBLE PRECISION :: v(maxn,maxncv), workl(lworkl), workd(3*maxn), &
        &              d(maxncv,2), resid(maxn), w(maxn), &
        &              tol=0.0d0, sigma
   DOUBLE PRECISION, EXTERNAL :: dnrm2
   INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, &
        &     mode1=1, ierr
   INTEGER :: ido, ipntr(11), iparam(11)
   
   CHARACTER(len=1) :: bmat='I'
   CHARACTER(len=2) :: which='SA'
   LOGICAL :: rvec, select(maxncv)
   
 !
   INTERFACE
      SUBROUTINE dismat(spl, mat)
        USE bsplines
        USE matrix
        TYPE(spline1d), INTENT(in) :: spl
        TYPE(gbmat), INTENT(inout) :: mat
      END SUBROUTINE dismat
      SUBROUTINE disrhs(kdiff, spl, rhs)
        USE bsplines
        INTEGER, INTENT(in) :: kdiff
        TYPE(spline1d), INTENT(in) :: spl
        DOUBLE PRECISION, INTENT(out) :: rhs(:)
      END SUBROUTINE disrhs
      SUBROUTINE meshdist(coefs, x, nx)
        DOUBLE PRECISION, INTENT(in) :: coefs(5)
        INTEGER, INTENT(iN) :: nx
        DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
      END SUBROUTINE meshdist
   END INTERFACE
 !
   NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8     ! Number oh intevals in x
   nidbas = 3 ! Degree of splines
   ngauss = 4 ! Number of Gauss points/interval
   kdiff = 2  ! Exponent of differential problem
   nlppform = .TRUE. ! Use PPFORM for gridval or not
   coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
                                                  ! see function FDIST in MESHDIST
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x axis
 !
   ALLOCATE(xgrid(0:nx))
   xgrid(0) = 0.0d0
   xgrid(nx) = 1.0d0
   CALL meshdist(coefs, xgrid, nx)
   WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
   WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE1D Result File')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NIDBAS', nidbas)
   CALL attach(fid, '/', 'NGAUSS', ngauss)
   CALL attach(fid, '/', 'KDIFF', kdiff)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   t0 = seconds()
   CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
   CALL get_dim(splx, nrank)       ! Rank of the FE matrix
   WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
 !
 !   FE matrix assembly
 !
   kl = nidbas
   ku = kl
   WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank
   CALL init(nrank, 1, mat)
 !!$  CALL dismat(splx, mat)
   CALL conmat(splx, mat, coefeq)
   CALL to_mat(mat)
   CALL creatf('pde1d_eig.h5', ffid, 'PDE1D Result File')
   PRINT*, 'rank', mat%rank
   PRINT*, 'nnz', mat%nnz
   PRINT*, 'irow', mat%irow
   PRINT*, 'cols', mat%cols
   CALL putmat(ffid,'/MAT',mat,'FE matrix')
   CALL closef(ffid)
 !
 !
   ALLOCATE(arr(nrank))
 !!$  WRITE(*,'(/a)') 'Matrice before BC'
 !!$  DO i=1,nrank
 !!$     CALL getrow(mat, i, arr)
 !!$     WRITE(*,'(12f8.3)') arr, SUM(arr)
 !!$  END DO
 !
 !   RHS assembly
 !
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(kdiff, splx, rhs)
 !!$  WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs
 !!$!
 !!$!   Set BC f(r=1) = 0 on matrix
 !!$!
 !!$  arr(1:nrank-1) = 0.0d0
 !!$  arr(nrank) = 1.0d0
 !!$  CALL putrow(mat, nrank, arr)
 !!$  CALL putcol(mat, nrank, arr)
   CALL putmat(fid,'/MATA', mat, 'FE matrix')
   tmat = seconds() - t0
 !
 !===========================================================================
 !              3.0  Eigevalue problem
 !
 !   Using Lapack dsyev
 !
   t0 = seconds()
   ALLOCATE(mata(nrank,nrank))
   ALLOCATE(eigvals(nrank))
   ALLOCATE(work(3*nrank))
   mata=0.0d0
   DO j=1,nrank
      CALL getcol(mat, j, mata(:,j))  ! mata is a dense matrix
   END DO
   CALL putarr(fid, '/MAT', mata, 'matrix A')
   CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info)
   teig = seconds() - t0
   PRINT*,'Info from DSYEV', info, arr(1)
   IF(info.EQ.0) THEN
      CALL putarr(fid, '/EIGVS', eigvals, 'eigenvalues of A')
      CALL putarr(fid, '/EIGVECS', mata, 'eigenvectors of A')
      WRITE(*,'(a/(10f10.4))') 'eigval', eigvals
   END IF
 !
 !   Using Arpack
 !
   ido = 0
   iparam(1) = ishfts
   iparam(3) = maxitr
   iparam(7) = mode1
 !
   IF(nev.GT.0) THEN
      t0 = seconds()
      DO   ! ARPACK reverse communication loop 
         CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, &
              &      ncv, v, maxn, iparam, ipntr, workd, workl,&
              &      lworkl, info)
         IF(ido.EQ.-1 .OR. ido.EQ.1) THEN
 !!$           WRITE(*,'(a/(10i4))') 'ipntr', ipntr
            CALL av(nrank,workd(ipntr(1)), workd(ipntr(2)))
            CYCLE
         END IF
         IF(info .LT. 0) THEN
            PRINT*, 'Error in _saupd with info =', info
         ELSE
            rvec = .TRUE.
            CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, &
                 &      bmat, nrank, which, nev, tol, resid, ncv, v, maxn, &
                 &      iparam, ipntr, workd, workl, lworkl, ierr )
            IF( ierr .NE. 0 ) THEN
               PRINT*,'Error in _seupd with ierr =', ierr
            ELSE
               nconv = iparam(5)
               PRINT*, '--- eigenvalues and error ---'
               DO j=1,nconv ! Residual norms
 !!$              CALL av(nrank,v(1,j), w)   ! d(1,j) is the j^th eigenvalue
 !!$              CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1)
 !!$              d(j,2) = dnrm2(nrank, w, 1)
 !!$              d(j,2) = d(j,2)/abs(d(j,1))
                  WRITE(*,'(2(1pe12.4))') d(j,1), eigvals(j)-d(j,1)
 !!$              CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A')
 !!$              CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A')
               END DO
               EXIT
            END IF
         END IF
      END DO   ! End of ARPACK reverse communication loop
      tarpack = seconds()-t0
   END IF
 !===========================================================================
 !              9.0  Epilogue
 !
   WRITE(*,'(a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig
   WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack
 !
   DEALLOCATE(xgrid, rhs, sol)
   DEALLOCATE(arr)
   CALL destroy(mat)
   CALL destroy_sp(splx)
   CALL closef(fid)
 CONTAINS
   SUBROUTINE av(n,v,w)
 !
 ! Matrix vector product: w <- Av
     INTEGER, INTENT(in) :: n
     DOUBLE PRECISION, INTENT(in) :: v(*)
     DOUBLE PRECISION, INTENT(out) :: w(*)
     w(1:n) = vmx(mat,v(1:n))
   END SUBROUTINE av
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
 ! Weak form = Int(x*dw/dx*dt/dx)dx
 !
     c(1) = x
     idt(1) = 1
     idw(1) = 1
   END SUBROUTINE coefeq
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function defined in FDIST
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
 SUBROUTINE dismat(spl, mat)
 !
 !   Assembly FE matrix mat using spline spl
 !
   USE bsplines
   USE matrix
   IMPLICIT NONE
   TYPE(spline1d), INTENT(in) :: spl
   TYPE(gbmat), INTENT(inout) :: mat
   INTEGER :: nrank, nx, nidbas, ngauss
   INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE PRECISION :: contrib
 !
   INTEGER :: kterms                ! Number of terms in weak form
   INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
   DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, nrank, nx, nidbas)
   ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
 !
 !   Weak form
 !
   kterms = mat%nterms
   ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         CALL coefeq(xgauss(igauss), idert, iderw, coefs)
         DO iterm=1,kterms
            DO jt=0,nidbas
               DO iw=0,nidbas
                  contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
                       &    fun(iw,iderw(iterm)) * wgauss(igauss)
                  irow=i+iw; jcol=i+jt
                  CALL updtmat(mat, irow, jcol, contrib)
               END DO
            END DO
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
   DEALLOCATE(iderw, idert, coefs)
 !
 CONTAINS
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
 !
 ! Weak form = Int(x*dw/dx*dt/dx)dx
 !
     c(1) = x
     idt(1) = 1
     idw(1) = 1
   END SUBROUTINE coefeq
 END SUBROUTINE dismat
 !+++
 SUBROUTINE disrhs(kdiff, spl, rhs)
 !
 !   Assenbly the RHS using spline spl
 !
   USE bsplines
   IMPLICIT NONE
   INTEGER, INTENT(in) :: kdiff
   TYPE(spline1d), INTENT(in) :: spl
   DOUBLE PRECISION, INTENT(out) :: rhs(:)
   INTEGER :: nrank, nx, nidbas, ngauss
   INTEGER :: i, igauss, left
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, nrank, nx, nidbas)
 !!$  WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
 !
   ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
 !!$  WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   rhs(1:nrank) = 0.0d0
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
 !!$     WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         left = i-1
 !!$        WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
         contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
         rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
 !
 CONTAINS
   DOUBLE PRECISION FUNCTION rhseq(x,k)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(in) :: k
     rhseq = k*k*x**(k-1)
   END FUNCTION rhseq
 END SUBROUTINE disrhs
diff --git a/examples/pde1d_eig_gb.f90 b/examples/pde1d_eig_gb.f90
index c80f22d..4b1224f 100644
--- a/examples/pde1d_eig_gb.f90
+++ b/examples/pde1d_eig_gb.f90
@@ -1,460 +1,460 @@
 !>
 !> @file pde1d_eig_gb.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Solving the following 1d differential  eqation using splines:
 !
 !    -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0
 !    exact solution: f(r) = 1 - r^k
 !
   USE bsplines
   USE matrix
   USE futils
   USE conmat_mod
   IMPLICIT NONE
   INTEGER :: nx, nidbas, ngauss, kdiff
   INTEGER :: i, nrank, kl, ku
   LOGICAL :: nlppform
   DOUBLE PRECISION :: coefs(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol
   TYPE(spline1d) :: splx
   TYPE(gemat) :: mat
 !!$  TYPE(gbmat) :: mat
 !
   CHARACTER(len=128) :: file='pde1d.h5'
   INTEGER :: fid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
   DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack
 !
   DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvals(:), work(:)
   INTEGER :: j, info
 !
   INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25, &
        &                lworkl=maxncv*(maxncv+8), zero=0.0d0
   DOUBLE PRECISION :: v(maxn,maxncv), workl(lworkl), workd(3*maxn), &
        &              d(maxncv,2), resid(maxn), w(maxn), &
        &              tol=0.0d0, sigma
   DOUBLE PRECISION, EXTERNAL :: dnrm2
   INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, &
        &     mode1=1, ierr
   INTEGER :: ido, ipntr(11), iparam(11)
   
   CHARACTER(len=1) :: bmat='I'
   CHARACTER(len=2) :: which='SA'
   LOGICAL :: rvec, select(maxncv)
   
 !
   INTERFACE
      SUBROUTINE dismat(spl, mat)
        USE bsplines
        USE matrix
        TYPE(spline1d), INTENT(in) :: spl
        TYPE(gbmat), INTENT(inout) :: mat
      END SUBROUTINE dismat
      SUBROUTINE disrhs(kdiff, spl, rhs)
        USE bsplines
        INTEGER, INTENT(in) :: kdiff
        TYPE(spline1d), INTENT(in) :: spl
        DOUBLE PRECISION, INTENT(out) :: rhs(:)
      END SUBROUTINE disrhs
      SUBROUTINE meshdist(coefs, x, nx)
        DOUBLE PRECISION, INTENT(in) :: coefs(5)
        INTEGER, INTENT(iN) :: nx
        DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
      END SUBROUTINE meshdist
   END INTERFACE
 !
   NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8     ! Number oh intevals in x
   nidbas = 3 ! Degree of splines
   ngauss = 4 ! Number of Gauss points/interval
   kdiff = 2  ! Exponent of differential problem
   nlppform = .TRUE. ! Use PPFORM for gridval or not
   coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
                                                  ! see function FDIST in MESHDIST
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x axis
 !
   ALLOCATE(xgrid(0:nx))
   xgrid(0) = 0.0d0
   xgrid(nx) = 1.0d0
   CALL meshdist(coefs, xgrid, nx)
   WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
   WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE1D Result File')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NIDBAS', nidbas)
   CALL attach(fid, '/', 'NGAUSS', ngauss)
   CALL attach(fid, '/', 'KDIFF', kdiff)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   t0 = seconds()
   CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
   CALL get_dim(splx, nrank)       ! Rank of the FE matrix
   WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
 !
 !   FE matrix assembly
 !
   kl = nidbas
   ku = kl
 !!$  CALL init(kl, ku, nrank, 1, mat)
 !!$  WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank
   CALL init(nrank, 1, mat)
 !!$  CALL dismat(splx, mat)
   CALL conmat(splx, mat, coefeq)
 !
   ALLOCATE(arr(nrank))
 !!$  WRITE(*,'(/a)') 'Matrice before BC'
 !!$  DO i=1,nrank
 !!$     CALL getrow(mat, i, arr)
 !!$     WRITE(*,'(12f8.3)') arr, SUM(arr)
 !!$  END DO
 !
 !   RHS assembly
 !
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(kdiff, splx, rhs)
 !!$  WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs
 !
 !   Set BC f(r=1) = 0 on matrix
 !
   arr(1:nrank-1) = 0.0d0
   arr(nrank) = 1.0d0
   CALL putrow(mat, nrank, arr)
   CALL putcol(mat, nrank, arr)
   tmat = seconds() - t0
 !
 !===========================================================================
 !              3.0  Eigevalue problem
 !
 !   Using Lapack dsyev
 !
   t0 = seconds()
   ALLOCATE(mata(nrank,nrank))
   ALLOCATE(eigvals(nrank))
   ALLOCATE(work(3*nrank))
   mata=0.0d0
   DO j=1,nrank
      mata(j:nrank,j) = mat%val(j:nrank,j)
   END DO
      CALL putarr(fid, '/MAT', mata, 'matrix A')
   CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info)
   teig = seconds() - t0
   PRINT*,'Info from DSYEV', info, arr(1)
   IF(info.EQ.0) THEN
      CALL putarr(fid, '/EIGVS', eigvals, 'eigenvalues of A')
      CALL putarr(fid, '/EIGVECS', mata, 'eigenvectors of A')
      WRITE(*,'(a/(10f10.4))') 'eigval', eigvals
   END IF
 !
 !   Using Arpack
 !
   ido = 0
   iparam(1) = ishfts
   iparam(3) = maxitr
   iparam(7) = mode1
 !
   IF(nev.GT.0) THEN
      t0 = seconds()
      DO   ! ARPACK reverse communication loop 
         CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, &
              &      ncv, v, maxn, iparam, ipntr, workd, workl,&
              &      lworkl, info)
         IF(ido.EQ.-1 .OR. ido.EQ.1) THEN
            PRINT*, 'Error in _saupd with info =', info
            WRITE(*,'(a/(10i4))') 'ipntr', ipntr
            CALL av(nrank,workd(ipntr(1)), workd(ipntr(2)))
            CYCLE
         END IF
         IF(info .LT. 0) THEN
            PRINT*, 'Error in _saupd with info =', info
         ELSE
            rvec = .TRUE.
            CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, &
                 &      bmat, nrank, which, nev, tol, resid, ncv, v, maxn, &
                 &      iparam, ipntr, workd, workl, lworkl, ierr )
            IF( ierr .NE. 0 ) THEN
               PRINT*,'Error in _seupd with ierr =', ierr
            ELSE
               nconv = iparam(5)
               PRINT*, '--- eigenvalues and error ---'
               DO j=1,nconv ! Residual norms
 !!$              CALL av(nrank,v(1,j), w)   ! d(1,j) is the j^th eigenvalue
 !!$              CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1)
 !!$              d(j,2) = dnrm2(nrank, w, 1)
 !!$              d(j,2) = d(j,2)/abs(d(j,1))
               WRITE(*,'(i3,2(1pe12.4))') j, d(j,1), eigvals(j)-d(j,1)
 !!$              CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A')
 !!$              CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A')
               END DO
               EXIT
            END IF
         END IF
      END DO   ! End of ARPACK reverse communication loop
      tarpack = seconds()-t0
   END IF
 !===========================================================================
 !              9.0  Epilogue
 !
   WRITE(*,'(a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig
   WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack
 !
   DEALLOCATE(xgrid, rhs, sol)
   DEALLOCATE(arr)
   CALL destroy(mat)
   CALL destroy_sp(splx)
   CALL closef(fid)
 CONTAINS
   SUBROUTINE av(n,v,w)
 !
 ! Matrix vector product: w <- Av
     INTEGER, INTENT(in) :: n
     DOUBLE PRECISION, INTENT(in) :: v(*)
     DOUBLE PRECISION, INTENT(out) :: w(*)
     w(1:n) = vmx(mat,v(1:n))
   END SUBROUTINE av
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
 ! Weak form = Int(x*dw/dx*dt/dx)dx
 !
     c(1) = x
     idt(1) = 1
     idw(1) = 1
   END SUBROUTINE coefeq
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function defined in FDIST
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
 SUBROUTINE dismat(spl, mat)
 !
 !   Assembly FE matrix mat using spline spl
 !
   USE bsplines
   USE matrix
   IMPLICIT NONE
   TYPE(spline1d), INTENT(in) :: spl
   TYPE(gbmat), INTENT(inout) :: mat
   INTEGER :: nrank, nx, nidbas, ngauss
   INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE PRECISION :: contrib
 !
   INTEGER :: kterms                ! Number of terms in weak form
   INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
   DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, nrank, nx, nidbas)
   ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
 !
 !   Weak form
 !
   kterms = mat%nterms
   ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         CALL coefeq(xgauss(igauss), idert, iderw, coefs)
         DO iterm=1,kterms
            DO jt=0,nidbas
               DO iw=0,nidbas
                  contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
                       &    fun(iw,iderw(iterm)) * wgauss(igauss)
                  irow=i+iw; jcol=i+jt
                  CALL updtmat(mat, irow, jcol, contrib)
               END DO
            END DO
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
   DEALLOCATE(iderw, idert, coefs)
 !
 CONTAINS
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
 !
 ! Weak form = Int(x*dw/dx*dt/dx)dx
 !
     c(1) = x
     idt(1) = 1
     idw(1) = 1
   END SUBROUTINE coefeq
 END SUBROUTINE dismat
 !+++
 SUBROUTINE disrhs(kdiff, spl, rhs)
 !
 !   Assenbly the RHS using spline spl
 !
   USE bsplines
   IMPLICIT NONE
   INTEGER, INTENT(in) :: kdiff
   TYPE(spline1d), INTENT(in) :: spl
   DOUBLE PRECISION, INTENT(out) :: rhs(:)
   INTEGER :: nrank, nx, nidbas, ngauss
   INTEGER :: i, igauss, left
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, nrank, nx, nidbas)
 !!$  WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
 !
   ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
 !!$  WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   rhs(1:nrank) = 0.0d0
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
 !!$     WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         left = i-1
 !!$        WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
         contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
         rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
 !
 CONTAINS
   DOUBLE PRECISION FUNCTION rhseq(x,k)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(in) :: k
     rhseq = k*k*x**(k-1)
   END FUNCTION rhseq
 END SUBROUTINE disrhs
diff --git a/examples/pde1d_eig_ge.f90 b/examples/pde1d_eig_ge.f90
index 713a66e..a36f688 100644
--- a/examples/pde1d_eig_ge.f90
+++ b/examples/pde1d_eig_ge.f90
@@ -1,474 +1,474 @@
 !>
 !> @file pde1d_eig_ge.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Solving the following 1d differential  eqation using splines:
 !
 !    -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0
 !    exact solution: f(r) = 1 - r^k
 !
   USE bsplines
   USE matrix
   USE futils
   USE conmat_mod
   IMPLICIT NONE
   INTEGER :: nx, nidbas, ngauss, kdiff
   INTEGER :: i, nrank, kl, ku
   LOGICAL :: nlppform
   DOUBLE PRECISION :: coefs(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol
   TYPE(spline1d) :: splx
   TYPE(gemat) :: mat
 !
   CHARACTER(len=128) :: file='pde1d.h5'
   INTEGER :: fid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
   DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack
 !
   DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvecsr(:,:), eigvecsl(:,:), &
        &                           wr(:), wi(:), work(:)
   INTEGER :: j, info
 !
   INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=maxn
   DOUBLE PRECISION :: v(maxn,maxncv),workd(3*maxn), workev(3*maxncv), &
        &              d(maxncv,2), resid(maxn), w(maxncv,maxn), &
        &              zero=0.0d0, tol=0.0d0, sigmar, sigmai
   DOUBLE PRECISION, ALLOCATABLE :: workl(:)
   DOUBLE PRECISION, EXTERNAL :: dnrm2
   INTEGER :: nev=10, ncv=30, ishfts=1, maxitr=300, nconv, &
        &     mode1=1, ierr, lworkl
   INTEGER :: ido, ipntr(11), iparam(11)
   
   CHARACTER(len=1) :: bmat='I'
   CHARACTER(len=2) :: which='SA'
   LOGICAL :: rvec, select(maxncv)
   
 !
   INTERFACE
      SUBROUTINE dismat(spl, mat)
        USE bsplines
        USE matrix
        TYPE(spline1d), INTENT(in) :: spl
        TYPE(gbmat), INTENT(inout) :: mat
      END SUBROUTINE dismat
      SUBROUTINE disrhs(kdiff, spl, rhs)
        USE bsplines
        INTEGER, INTENT(in) :: kdiff
        TYPE(spline1d), INTENT(in) :: spl
        DOUBLE PRECISION, INTENT(out) :: rhs(:)
      END SUBROUTINE disrhs
      SUBROUTINE meshdist(coefs, x, nx)
        DOUBLE PRECISION, INTENT(in) :: coefs(5)
        INTEGER, INTENT(iN) :: nx
        DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
      END SUBROUTINE meshdist
   END INTERFACE
 !
   NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, &
        &            nev, ncv, which
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8     ! Number oh intevals in x
   nidbas = 3 ! Degree of splines
   ngauss = 4 ! Number of Gauss points/interval
   kdiff = 2  ! Exponent of differential problem
   nlppform = .TRUE. ! Use PPFORM for gridval or not
   coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
                                                  ! see function FDIST in MESHDIST
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x axis
 !
   ALLOCATE(xgrid(0:nx))
   xgrid(0) = 0.0d0
   xgrid(nx) = 1.0d0
   CALL meshdist(coefs, xgrid, nx)
   WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
   WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE1D Result File')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NIDBAS', nidbas)
   CALL attach(fid, '/', 'NGAUSS', ngauss)
   CALL attach(fid, '/', 'KDIFF', kdiff)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   t0 = seconds()
   CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
   CALL get_dim(splx, nrank)       ! Rank of the FE matrix
   WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
 !
 !   FE matrix assembly
 !
   kl = nidbas
   ku = kl
 !!$  CALL init(kl, ku, nrank, 1, mat)
   WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank
   CALL init(nrank, 1, mat)
 !!$  CALL dismat(splx, mat)
   CALL conmat(splx, mat, coefeq)
 !
   ALLOCATE(arr(nrank))
 !!$  WRITE(*,'(/a)') 'Matrice before BC'
 !!$  DO i=1,nrank
 !!$     CALL getrow(mat, i, arr)
 !!$     WRITE(*,'(12f8.3)') arr, SUM(arr)
 !!$  END DO
 !
 !   RHS assembly
 !
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(kdiff, splx, rhs)
 !!$  WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs
 !
 !   Set BC f(r=1) = 0 on matrix
 !
   arr(1:nrank-1) = 0.0d0
   arr(nrank) = 1.0d0
   CALL putrow(mat, nrank, arr)
   CALL putcol(mat, nrank, arr)
   tmat = seconds() - t0
 !
 !===========================================================================
 !              3.0  Eigevalue problem
 !
 !   Using Lapack dgeev
 !
   t0 = seconds()
   ALLOCATE(mata(nrank,nrank))
   ALLOCATE(eigvecsr(nrank,nrank))
   ALLOCATE(eigvecsl(nrank,nrank))
   ALLOCATE(work(4*nrank))
   ALLOCATE(wr(nrank), wi(nrank))
   mata(:,:) = mat%val(:,:)
   CALL putarr(fid, '/MAT', mata, 'matrix A')
 !!$  CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info)
   CALL dgeev('N', 'V', nrank, mata, nrank, wr, wi, eigvecsl, SIZE(eigvecsl,1), &
        &     eigvecsr, SIZE(eigvecsr,1), work, SIZE(work), info)
   teig = seconds() - t0
   PRINT*,'Info from DGEEV', info, arr(1)
   IF(info.EQ.0) THEN
      CALL putarr(fid, '/REIGVS', wr, 'Real of eigenvalues of A')
      CALL putarr(fid, '/IEIGVS', wi, 'Imag of eigenvalues of A')
      CALL putarr(fid, '/EIGVECL', eigvecsl, 'left eigenvalues of A')
      CALL putarr(fid, '/EIGVECR', eigvecsr, 'right eigenvalues of A')
      WRITE(*,'(a/(10f10.4))') 'Real eigval', wr
      WRITE(*,'(a/(10f10.4))') 'Imag eigval', wi
   END IF
 !
 !   Using Arpack
 !
   ido = 0
   iparam(1) = ishfts
   iparam(3) = maxitr
   iparam(7) = mode1
 !
  lworkl  = 3*ncv**2+6*ncv
  ALLOCATE(workl(lworkl))
   
 !
   t0 = seconds()
   DO   ! ARPACK reverse communication loop 
      CALL dnaupd(ido, bmat, nrank, which, nev, tol, resid, &
           &      ncv, v, maxn, iparam, ipntr, workd, workl,&
           &      lworkl, info)
      IF(ido.EQ.-1 .OR. ido.EQ.1) THEN
         CALL av(nrank,workd(ipntr(1)), workd(ipntr(2)))
         CYCLE
      END IF
      PRINT*, 'INFO =', info
      IF(info .LT. 0) THEN
         PRINT*, 'Error in dnaupd with info =', info
      ELSE
         rvec = .TRUE.
         CALL dneupd(rvec, 'A', select, d, d(1,2), v, size(v,1), &
              &      sigmar, sigmai, workev, bmat, nrank, which, nev, tol, &
              &      resid, ncv, v, size(v,1), iparam, ipntr, workd, workl, &
              &      lworkl, ierr )
         IF( ierr .NE. 0 ) THEN
            PRINT*,'Error in dneupd with ierr =', ierr
         ELSE
            nconv = iparam(5)
            PRINT*, '--- Real eigenvalues and comprison with Lapack results ---'
 ! eiegvalues and diff with Lapack results
            DO j=1,nconv
               WRITE(*,'(i3,3(1pe12.4))') j, d(j,1), wr(j), wr(j)-d(j,1)
            END DO
            PRINT*, '--- Imag eigenvalues and comprison with Lapack results ---'
            DO j=1,nconv
               WRITE(*,'(i3,3(1pe12.4))') j, d(j,2), wi(j), wi(j)-d(j,2)
            END DO
 !!$              CALL av(nrank,v(1,j), w)   ! d(1,j) is the j^th eigenvalue
 !!$              CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1)
 !!$              d(j,2) = dnrm2(nrank, w, 1)
 !!$              d(j,2) = d(j,2)/abs(d(j,1))
            CALL putarr(fid, '/EIGVS', d(1:nconv,:), 'ARPACK eigenvalues of A')
 !!$              CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A')
 !!$           END DO
         END IF
         EXIT
      END IF
   END DO   ! End of ARPACK reverse communication loop
   tarpack = seconds()-t0
   
 !===========================================================================
 !              9.0  Epilogue
 !
   WRITE(*,'(a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig
   WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack
 !
   DEALLOCATE(xgrid, rhs, sol)
   DEALLOCATE(arr)
   CALL destroy(mat)
   CALL destroy_sp(splx)
   CALL closef(fid)
 CONTAINS
   SUBROUTINE av(n,v,w)
 !
 ! Matrix vector product: w <- Av
     INTEGER, INTENT(in) :: n
     DOUBLE PRECISION, INTENT(in) :: v(*)
     DOUBLE PRECISION, INTENT(out) :: w(*)
     w(1:n) = vmx(mat,v(1:n))
   END SUBROUTINE av
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
 ! Weak form = Int(x*dw/dx*dt/dx)dx
 !
     c(1) = x
     idt(1) = 1
     idw(1) = 1
   END SUBROUTINE coefeq
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function defined in FDIST
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
 SUBROUTINE dismat(spl, mat)
 !
 !   Assembly FE matrix mat using spline spl
 !
   USE bsplines
   USE matrix
   IMPLICIT NONE
   TYPE(spline1d), INTENT(in) :: spl
   TYPE(gbmat), INTENT(inout) :: mat
   INTEGER :: nrank, nx, nidbas, ngauss
   INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE PRECISION :: contrib
 !
   INTEGER :: kterms                ! Number of terms in weak form
   INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
   DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, nrank, nx, nidbas)
   ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
 !
 !   Weak form
 !
   kterms = mat%nterms
   ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         CALL coefeq(xgauss(igauss), idert, iderw, coefs)
         DO iterm=1,kterms
            DO jt=0,nidbas
               DO iw=0,nidbas
                  contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
                       &    fun(iw,iderw(iterm)) * wgauss(igauss)
                  irow=i+iw; jcol=i+jt
                  CALL updtmat(mat, irow, jcol, contrib)
               END DO
            END DO
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
   DEALLOCATE(iderw, idert, coefs)
 !
 CONTAINS
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
 !
 ! Weak form = Int(x*dw/dx*dt/dx)dx
 !
     c(1) = x
     idt(1) = 1
     idw(1) = 1
   END SUBROUTINE coefeq
 END SUBROUTINE dismat
 !+++
 SUBROUTINE disrhs(kdiff, spl, rhs)
 !
 !   Assenbly the RHS using spline spl
 !
   USE bsplines
   IMPLICIT NONE
   INTEGER, INTENT(in) :: kdiff
   TYPE(spline1d), INTENT(in) :: spl
   DOUBLE PRECISION, INTENT(out) :: rhs(:)
   INTEGER :: nrank, nx, nidbas, ngauss
   INTEGER :: i, igauss, left
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, nrank, nx, nidbas)
 !!$  WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
 !
   ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
 !!$  WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   rhs(1:nrank) = 0.0d0
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
 !!$     WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         left = i-1
 !!$        WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
         contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
         rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
 !
 CONTAINS
   DOUBLE PRECISION FUNCTION rhseq(x,k)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(in) :: k
     rhseq = k*k*x**(k-1)
   END FUNCTION rhseq
 END SUBROUTINE disrhs
diff --git a/examples/pde1d_eig_zcsr.f90 b/examples/pde1d_eig_zcsr.f90
index d467456..eaaebe4 100644
--- a/examples/pde1d_eig_zcsr.f90
+++ b/examples/pde1d_eig_zcsr.f90
@@ -1,481 +1,481 @@
 !>
 !> @file pde1d_eig_zcsr.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Solving the following 1d differential  eqation using splines:
 !
 !    -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0
 !    exact solution: f(r) = 1 - r^k
 !
   USE bsplines
   USE csr
   USE futils
   USE f95_precision, ONLY: WP => DP
   USE lapack95, ONLY: geev
   IMPLICIT NONE
   INTEGER :: nx, nidbas, ngauss, kdiff
   INTEGER :: i, nrank, kl, ku
   LOGICAL :: nlppform
   DOUBLE PRECISION :: coefs(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol
   TYPE(spline1d) :: splx
   TYPE(zcsr_mat) :: mat
 !
   CHARACTER(len=128) :: file='pde1d.h5'
   INTEGER :: fid, ffid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
   DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack
 !
 !   Lapack95 GEEV arguments
   COMPLEX(WP), ALLOCATABLE :: mata(:,:), w(:)
   REAL(WP), ALLOCATABLE    :: WR(:), WI(:)
   INTEGER :: j, info
 !
   INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25
   INTEGER :: lworkl, zero=0.0d0
   DOUBLE PRECISION :: d(maxncv,2), tol=0.0d0, rwork(maxncv)
   DOUBLE COMPLEX :: v(maxn,maxncv), resid(maxn), sigma
   DOUBLE COMPLEX :: workd(3*maxncv), lwork(2*maxn)
   DOUBLE COMPLEX, ALLOCATABLE :: workl(:), vl(:,:), vr(:,:)
   DOUBLE PRECISION, EXTERNAL :: dnrm2
   INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, &
        &     mode1=1, ierr
   INTEGER :: ido, ipntr(14), iparam(11)
   
   CHARACTER(len=1) :: bmat='I'
   CHARACTER(len=2) :: which='SA'
   LOGICAL :: rvec, select(maxncv)
   
 !
   INTERFACE
      SUBROUTINE dismat(spl, mat)
        USE bsplines
        USE csr
        TYPE(spline1d), INTENT(in) :: spl
        TYPE(zcsr_mat), INTENT(inout) :: mat
      END SUBROUTINE dismat
      SUBROUTINE disrhs(kdiff, spl, rhs)
        USE bsplines
        INTEGER, INTENT(in) :: kdiff
        TYPE(spline1d), INTENT(in) :: spl
        DOUBLE PRECISION, INTENT(out) :: rhs(:)
      END SUBROUTINE disrhs
      SUBROUTINE meshdist(coefs, x, nx)
        DOUBLE PRECISION, INTENT(in) :: coefs(5)
        INTEGER, INTENT(iN) :: nx
        DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
      END SUBROUTINE meshdist
   END INTERFACE
 !
   NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8     ! Number oh intevals in x
   nidbas = 3 ! Degree of splines
   ngauss = 4 ! Number of Gauss points/interval
   kdiff = 2  ! Exponent of differential problem
   nlppform = .TRUE. ! Use PPFORM for gridval or not
   coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
                                                  ! see function FDIST in MESHDIST
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x axis
 !
   ALLOCATE(xgrid(0:nx))
   xgrid(0) = 0.0d0
   xgrid(nx) = 1.0d0
   CALL meshdist(coefs, xgrid, nx)
   WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
   WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE1D Result File')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NIDBAS', nidbas)
   CALL attach(fid, '/', 'NGAUSS', ngauss)
   CALL attach(fid, '/', 'KDIFF', kdiff)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   t0 = seconds()
   CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
   CALL get_dim(splx, nrank)       ! Rank of the FE matrix
   WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
 !
 !   FE matrix assembly
 !
   kl = nidbas
   ku = kl
   WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank
   CALL init(nrank, 1, mat)
   CALL dismat(splx, mat)
 !!$  CALL conmat(splx, mat, coefeq)
   CALL to_mat(mat)
   PRINT*,'MAT after to_mat', mat%val
   CALL creatf('pde1d_eig.h5', ffid, 'PDE1D Result File', real_prec='D')
   PRINT*, 'rank', mat%rank
   PRINT*, 'nnz', mat%nnz
   PRINT*, 'irow', mat%irow
   PRINT*, 'cols', mat%cols
   CALL putmat(ffid,'/MAT',mat,'FE matrix')
   PRINT*, 'MAT',mat%val
   CALL closef(ffid)
   STOP
 !
 !
   ALLOCATE(arr(nrank))
 !!$  WRITE(*,'(/a)') 'Matrice before BC'
 !!$  DO i=1,nrank
 !!$     CALL getrow(mat, i, arr)
 !!$     WRITE(*,'(12f8.3)') arr, SUM(arr)
 !!$  END DO
 !
 !   RHS assembly
 !
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(kdiff, splx, rhs)
 !!$  WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs
 !!$!
 !!$!   Set BC f(r=1) = 0 on matrix
 !!$!
 !!$  arr(1:nrank-1) = 0.0d0
 !!$  arr(nrank) = 1.0d0
 !!$  CALL putrow(mat, nrank, arr)
 !!$  CALL putcol(mat, nrank, arr)
   CALL putmat(fid,'/MATA', mat, 'FE matrix')
   tmat = seconds() - t0
 !
 !===========================================================================
 !              3.0  Eigevalue problem
 !
 !   Using Lapack dsyev
 !
   t0 = seconds()
   ALLOCATE(mata(nrank,nrank))
   ALLOCATE(w(nrank))
   ALLOCATE(wr(nrank), wi(nrank))
   ALLOCATE(vl(nrank,nrank), vr(nrank,nrank))
   mata=0.0d0
   DO j=1,nrank
      CALL getcol(mat, j, mata(:,j))  ! convert to dense matrix mata
   END DO
   CALL putarr(fid, '/MAT', mata, 'matrix A')
   CALL geev(mata, w)
   wr(:) = REAL(w(:))
   wi(:) = AIMAG(w(:))
   teig = seconds() - t0
   PRINT*,'Info from ZGEEV', info, arr(1)
   IF(info.EQ.0) THEN
      CALL putarr(fid, '/REIGVS', wr, 'eigenvalues of A')
      CALL putarr(fid, '/IEIGVS', wi, 'eigenvectors of A')
      WRITE(*,'(a/(10f10.4))') 'Real eigval', wr
      WRITE(*,'(a/(10f10.4))') 'Imag eigval', wi
   END IF
 !
 !   Using Arpack
 !
   ido = 0
   iparam(1) = ishfts
   iparam(3) = maxitr
   iparam(7) = mode1
 !
   lworkl  = 3*ncv**2+5*ncv
   ALLOCATE(workl(lworkl))
 !
   IF(nev.GT.0) THEN
      t0 = seconds()
      DO   ! ARPACK reverse communication loop 
         CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, ncv, &
              &      v, SIZE(v,1), iparam, ipntr, workd, workl, lworkl, &
              &      rwork, info)
         IF(ido.EQ.-1 .OR. ido.EQ.1) THEN
 !!$           WRITE(*,'(a/(10i4))') 'ipntr', ipntr
            CALL av(nrank,workd(ipntr(1)), workd(ipntr(2)))
            CYCLE
         END IF
         IF(info .LT. 0) THEN
            PRINT*, 'Error in _saupd with info =', info
         ELSE
            rvec = .TRUE.
            CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, &
                 &      bmat, nrank, which, nev, tol, resid, ncv, v, maxn, &
                 &      iparam, ipntr, workd, workl, lworkl, ierr )
            IF( ierr .NE. 0 ) THEN
               PRINT*,'Error in _seupd with ierr =', ierr
            ELSE
               nconv = iparam(5)
               PRINT*, '--- eigenvalues and error ---'
               DO j=1,nconv ! Residual norms
 !!$              CALL av(nrank,v(1,j), w)   ! d(1,j) is the j^th eigenvalue
 !!$              CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1)
 !!$              d(j,2) = dnrm2(nrank, w, 1)
 !!$              d(j,2) = d(j,2)/abs(d(j,1))
                  WRITE(*,'(2(1pe12.4))') d(j,1), eigvals(j)-d(j,1)
 !!$              CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A')
 !!$              CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A')
               END DO
               EXIT
            END IF
         END IF
      END DO   ! End of ARPACK reverse communication loop
      tarpack = seconds()-t0
   END IF
 !===========================================================================
 !              9.0  Epilogue
 !
   WRITE(*,'(a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig
   WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack
 !
   DEALLOCATE(xgrid, rhs, sol)
   DEALLOCATE(arr)
   CALL destroy(mat)
   CALL destroy_sp(splx)
   CALL closef(fid)
 CONTAINS
   SUBROUTINE av(n,v,w)
 !
 ! Matrix vector product: w <- Av
     INTEGER, INTENT(in) :: n
     DOUBLE COMPLEX, INTENT(in) :: v(*)
     DOUBLE COMPLEX, INTENT(out) :: w(*)
     w(1:n) = vmx(mat,v(1:n))
   END SUBROUTINE av
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
 ! Weak form = Int(x*dw/dx*dt/dx)dx
 !
     c(1) = x
     idt(1) = 1
     idw(1) = 1
   END SUBROUTINE coefeq
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function defined in FDIST
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
 SUBROUTINE dismat(spl, mat)
 !
 !   Assembly FE matrix mat using spline spl
 !
   USE bsplines
   USE csr
   IMPLICIT NONE
   TYPE(spline1d), INTENT(in) :: spl
   TYPE(zcsr_mat), INTENT(inout) :: mat
   INTEGER :: nrank, nx, nidbas, ngauss
   INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE COMPLEX                :: contrib
 !
   INTEGER :: kterms                ! Number of terms in weak form
   INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
   DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, nrank, nx, nidbas)
   ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
 !
 !   Weak form
 !
   kterms = mat%nterms
   ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         CALL coefeq(xgauss(igauss), idert, iderw, coefs)
         DO iterm=1,kterms
            DO jt=0,nidbas
               DO iw=0,nidbas
                  contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
                       &    fun(iw,iderw(iterm)) * wgauss(igauss)
                  irow=i+iw; jcol=i+jt
                  CALL updtmat(mat, irow, jcol, contrib)
               END DO
            END DO
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
   DEALLOCATE(iderw, idert, coefs)
 !
 CONTAINS
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
 !
 ! Weak form = Int(x*dw/dx*dt/dx)dx
 !
     c(1) = x
     idt(1) = 1
     idw(1) = 1
   END SUBROUTINE coefeq
 END SUBROUTINE dismat
 !+++
 SUBROUTINE disrhs(kdiff, spl, rhs)
 !
 !   Assenbly the RHS using spline spl
 !
   USE bsplines
   IMPLICIT NONE
   INTEGER, INTENT(in) :: kdiff
   TYPE(spline1d), INTENT(in) :: spl
   DOUBLE PRECISION, INTENT(out) :: rhs(:)
   INTEGER :: nrank, nx, nidbas, ngauss
   INTEGER :: i, igauss, left
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, nrank, nx, nidbas)
 !!$  WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
 !
   ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
 !!$  WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   rhs(1:nrank) = 0.0d0
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
 !!$     WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         left = i-1
 !!$        WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
         contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
         rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
 !
 CONTAINS
   DOUBLE PRECISION FUNCTION rhseq(x,k)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(in) :: k
     rhseq = k*k*x**(k-1)
   END FUNCTION rhseq
 END SUBROUTINE disrhs
diff --git a/examples/pde1d_eig_zmumps.f90 b/examples/pde1d_eig_zmumps.f90
index ec17841..644f680 100644
--- a/examples/pde1d_eig_zmumps.f90
+++ b/examples/pde1d_eig_zmumps.f90
@@ -1,460 +1,460 @@
 !>
 !> @file pde1d_eig_zmumps.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Solving the following 1d differential  eqation using splines:
 !
 !  Solve the standard eigenvalue:
 !      A*x = \lambda *x or inv(A)*x = 1/\lambda * x using Arpack and MUMPS.
 !   where A is obtained from discretozation of
 !     -d/dr[r d/dr] f = k^2 r^(k-1)
 !
   USE bsplines
   USE mumps_bsplines
   USE futils
   IMPLICIT NONE
   INTEGER :: nx, nidbas, ngauss, kdiff
   INTEGER :: i, nrank
   LOGICAL :: nlppform
   DOUBLE PRECISION :: coefs(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid
   TYPE(spline1d) :: splx
   TYPE(zmumps_mat) :: mat
 !
   INTEGER :: ierr
   INTEGER :: fid
   CHARACTER(len=32) :: str
   DOUBLE PRECISION :: seconds, t0, tmat, tfac, tarpack
 !
 !   Arpack:  Solve the standard eigenvalue problem
 !
   INTEGER :: nev = 10, ncv = 10
   LOGICAL :: nlinv = .FALSE.  ! Solve inv(A) = 1/\lambda * x if nlinv=.TRUE.
   CHARACTER(len=2) :: which='SM'
   INTEGER :: info=0   ! Use random vector to start the Arnoldi iterations
   INTEGER :: ido=0    ! Reverse communications
   LOGICAL :: rvec
   LOGICAL, ALLOCATABLE :: select(:)
   INTEGER :: iparam(11), ipntr(14), nconv
   DOUBLE PRECISION :: tol=0.0d0
   CHARACTER(len=1) :: bmat='I'
 !
   INTEGER :: lworkl
   DOUBLE COMPLEX, ALLOCATABLE :: workl(:), workd(:), workev(:)
   DOUBLE COMPLEX, ALLOCATABLE :: eig_vals(:), eig_vecs(:,:), resid(:)
   DOUBLE COMPLEX :: sigma
   DOUBLE PRECISION, ALLOCATABLE :: rwork(:)
 !
   INTERFACE
      SUBROUTINE dismat(spl, mat)
        USE bsplines
        USE mumps_bsplines
        TYPE(spline1d), INTENT(in) :: spl
        TYPE(zmumps_mat), INTENT(inout) :: mat
      END SUBROUTINE dismat
      SUBROUTINE disrhs(kdiff, spl, rhs)
        USE bsplines
        INTEGER, INTENT(in) :: kdiff
        TYPE(spline1d), INTENT(in) :: spl
        DOUBLE PRECISION, INTENT(out) :: rhs(:)
      END SUBROUTINE disrhs
      SUBROUTINE meshdist(coefs, x, nx)
        DOUBLE PRECISION, INTENT(in) :: coefs(5)
        INTEGER, INTENT(iN) :: nx
        DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
      END SUBROUTINE meshdist
   END INTERFACE
 !
   NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, &
        &            nev, ncv, nlinv, which, tol
 !===========================================================================
 !              1.0 Prologue
 !
   CALL mpi_init(ierr)
 !
 !   Read in data specific to run
 !
   nx = 8     ! Number oh intevals in x
   nidbas = 3 ! Degree of splines
   ngauss = 4 ! Number of Gauss points/interval
   kdiff = 2  ! Exponent of differential problem
   nlppform = .TRUE. ! Use PPFORM for gridval or not
   coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
                                                  ! see function FDIST in MESHDIST
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x axis
 !
   ALLOCATE(xgrid(0:nx))
   xgrid(0) = 0.0d0
   xgrid(nx) = 1.0d0
   CALL meshdist(coefs, xgrid, nx)
   WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
   WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   t0 = seconds()
   CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
   CALL get_dim(splx, nrank)       ! Rank of the FE matrix
   WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
 !
 !   FE matrix assembly
 !
   WRITE(*,'(a,i6)') ' nrank', nrank
   CALL init(nrank, 1, mat)
   CALL dismat(splx, mat)
   CALL to_mat(mat)
   tmat = seconds() - t0
 !
   mat%mumps_par%IRN => mat%mumps_par%IRN_loc ! Work around for single proc.
   PRINT*, 'nnz_loc', mat%nnz_loc
   PRINT*, 'mat%mumps_par%N', mat%mumps_par%N
   PRINT*, 'mat%mumps_par%NZ_loc', mat%mumps_par%NZ_loc
   PRINT*, 'size of mat%mumps_par%IRN', SIZE(mat%mumps_par%IRN)
   PRINT*, 'mat%istart,mat%iend', mat%istart,mat%iend
 !  
   CALL creatf('pde1d_eig_zmumps.h5', fid, 'PDE1D Result File', real_prec='d')
   PRINT*, 'rank', mat%rank
   PRINT*, 'nnz', mat%nnz
 !
   CALL putmat(fid,'/MAT',mat,'FE matrix')
 !
   IF(nlinv) THEN
      t0 = seconds()
      CALL factor(mat)
      tfac = seconds()-t0
   END IF
 !===========================================================================
 !              3.0 Solve the standard eigenvalue problem
 !
   lworkl = 3*ncv**2 + 5*ncv
   ALLOCATE(workl(lworkl))
   ALLOCATE(workd(3*nrank))
   ALLOCATE(workev(2*ncv))
   ALLOCATE(eig_vals(ncv), eig_vecs(nrank,ncv))
   ALLOCATE(resid(nrank))
   ALLOCATE(rwork(ncv))
 !
   iparam(1) = 1   ! shfts
   iparam(3) = 300 ! Max. number of iterations
   iparam(7) = 1   ! Regular mode
 !
 ! The reverse communication loop
 !
   t0 = seconds()
   DO
      CALL znaupd  (ido, bmat, nrank, which, nev, tol, resid, ncv, &
           &        eig_vecs, nrank, iparam, ipntr, workd, workl, lworkl, &
           &        rwork, info )
 !
      IF(ido .EQ. -1 .OR. ido .EQ. 1) THEN   ! Compute A*v
         CALL av(nrank, workd(ipntr(1)), workd(ipntr(2)))
         CYCLE
      END IF
 !
      IF(info .LT. 0) THEN  ! Error
         PRINT*, 'Error in _naupd with info =', info
         EXIT
      ELSE
         rvec = .TRUE.
         ALLOCATE(select(ncv))
         CALL zneupd  (rvec, 'A', select, eig_vals, eig_vecs, nrank, &
              &       sigma, workev, bmat, nrank, which, nev, tol, resid, &
              &       ncv, eig_vecs, nrank, iparam, ipntr, workd, workl, lworkl,&
              &       rwork, ierr)
         IF(ierr .NE. 0) THEN
            PRINT*, 'Error in _neupd with ierr =', ierr
            EXIT
         ELSE
            nconv = iparam(5)
            PRINT*,'Number of converged eigenvalues', nconv
            IF(nlinv) THEN
               eig_vals(1:nconv) = (1.d0,0.0d0) / eig_vals(1:nconv)
            END IF
            WRITE(*,'(2(1pe12.3))') eig_vals(1:nconv)
            CALL putarr(fid, '/eig_vals', eig_vals(1:nconv))
            CALL putarr(fid, '/eig_vecs', eig_vecs(1:nrank,1:nconv))
            DO i=1,nconv
 !!$              WRITE(*,'(/a,2(pe20.6))') '*** eigen value =', eig_vals(i)
 !!$              WRITE(*,'(a/(10(1pe12.4)))') 'Real of eigen vector', &
 !!$                   &                        REAL(eig_vecs(1:nrank,i))
 !!$              WRITE(*,'(a/(10(1pe12.4)))') 'Imag of eigen vector', &
 !!$                   &                        aimag(eig_vecs(1:nrank,i))
               WRITE(str,'(a,i3.3)') '/eig_vecs_',i
               CALL putarr(fid, TRIM(str), eig_vecs(1:nrank,i))
            END DO
            EXIT
         END IF
      END IF
   END DO  ! End of reverse commuinication loop
   IF(info .EQ. 1) THEN
      PRINT*, 'Maximum number of iterations reached!'
      PRINT*, 'IPARAM(5) =', iparam(5)
   END IF
   PRINT*, 'Number of Arnoldi iterations', iparam(3)
   tarpack = seconds() - t0
 !===========================================================================
 !              9.0  Epilogue
 !
   WRITE(*,'(a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s)  ', tmat
   IF(nlinv) THEN
      WRITE(*,'(a,1pe12.3)') 'Matrice factorization time (s) ', tfac
   END IF
   WRITE(*,'(a,1pe12.3)') 'Arpack time (s)                ', tarpack
 !
   DEALLOCATE(xgrid)
   CALL destroy(mat)
   CALL destroy_sp(splx)
   CALL closef(fid)
   CALL mpi_finalize(ierr)
 !
 CONTAINS
   SUBROUTINE av(n,v,w)
 !
     INTEGER, INTENT(in) :: n
     DOUBLE COMPLEX, INTENT(in) :: v(*)
     DOUBLE COMPLEX, INTENT(out)   :: w(*)
 !
     IF(nlinv) THEN
        w(1:n) = v(1:n)
        CALL bsolve(mat,w(1:n))  ! Solve A*w = v or w=inv(A)*v
     ELSE
        w(1:n) = vmx(mat,v(1:n)) ! A*v matrix product
     END IF
   END SUBROUTINE av
 !
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
 ! Weak form = Int(x*dw/dx*dt/dx)dx
 !
     c(1) = x
     idt(1) = 1
     idw(1) = 1
   END SUBROUTINE coefeq
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function defined in FDIST
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
 SUBROUTINE dismat(spl, mat)
 !
 !   Assembly FE matrix mat using spline spl
 !
   USE bsplines
   USE mumps_bsplines
   IMPLICIT NONE
   TYPE(spline1d), INTENT(in) :: spl
   TYPE(zmumps_mat), INTENT(inout) :: mat
   INTEGER :: nrank, nx, nidbas, ngauss
   INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE COMPLEX                :: contrib
 !
   INTEGER :: kterms                ! Number of terms in weak form
   INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
   DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, nrank, nx, nidbas)
   ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
 !
 !   Weak form
 !
   kterms = mat%nterms
   ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         CALL coefeq(xgauss(igauss), idert, iderw, coefs)
         DO iterm=1,kterms
            DO jt=0,nidbas
               DO iw=0,nidbas
                  contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
                       &    fun(iw,iderw(iterm)) * wgauss(igauss)
                  irow=i+iw; jcol=i+jt
                  CALL updtmat(mat, irow, jcol, contrib)
               END DO
            END DO
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
   DEALLOCATE(iderw, idert, coefs)
 !
 CONTAINS
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
 !
 ! Weak form = Int(x*dw/dx*dt/dx)dx
 !
     c(1) = x
     idt(1) = 1
     idw(1) = 1
   END SUBROUTINE coefeq
 END SUBROUTINE dismat
 !+++
 SUBROUTINE disrhs(kdiff, spl, rhs)
 !
 !   Assenbly the RHS using spline spl
 !
   USE bsplines
   IMPLICIT NONE
   INTEGER, INTENT(in) :: kdiff
   TYPE(spline1d), INTENT(in) :: spl
   DOUBLE PRECISION, INTENT(out) :: rhs(:)
   INTEGER :: nrank, nx, nidbas, ngauss
   INTEGER :: i, igauss, left
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, nrank, nx, nidbas)
 !!$  WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
 !
   ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
 !!$  WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   rhs(1:nrank) = 0.0d0
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
 !!$     WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         left = i-1
 !!$        WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
         contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
         rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
 !
 CONTAINS
   DOUBLE PRECISION FUNCTION rhseq(x,k)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(in) :: k
     rhseq = k*k*x**(k-1)
   END FUNCTION rhseq
 END SUBROUTINE disrhs
diff --git a/examples/pde1dp.f90 b/examples/pde1dp.f90
index 7eb32bd..36c93d4 100644
--- a/examples/pde1dp.f90
+++ b/examples/pde1dp.f90
@@ -1,170 +1,170 @@
 !>
 !> @file pde1dp.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   1D PDE with priodic BC
 !
   USE pde1dp_mod
   USE bsplines
   USE matrix
   USE futils
   USE conmat_mod
 !
   IMPLICIT NONE
   CHARACTER(len=128) :: file='pde1dp.h5'
   INTEGER :: fid
   INTEGER :: nx, nidbas, ngauss, ibcoef
   INTEGER :: nrank, kl, ku, dim
   DOUBLE PRECISION :: coefs(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, sol, rhs
   INTEGER, PARAMETER :: npts=100
   DOUBLE PRECISION, DIMENSION(0:npts-1) :: xpts, frhs
   DOUBLE PRECISION :: dx, errmx
   DOUBLE PRECISION, ALLOCATABLE :: arr(:,:)
   TYPE(periodic_mat) :: mat
   INTEGER :: i, j
 !
   NAMELIST /newrun/ nx, nidbas, ngauss, ibcoef, coefs
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8     ! Number oh intevals in x
   nidbas = 3 ! Degree of splines
   ngauss = 4 ! Number of Gauss points/interval
   ibcoef = 1 ! Index of non-zero spline coef for RHS
   coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
                                                  ! see function FDIST in MESHDIST
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x axis
 !
   ALLOCATE(xgrid(0:nx))
   xgrid(0) = 0.0d0
   xgrid(nx) = 1.0d0
   CALL meshdist(coefs, xgrid, nx)
   WRITE(*,'(/a/(10f8.3))') 'XGRID', xgrid(0:nx)
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE1DP Result File')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NIDBAS', nidbas)
   CALL attach(fid, '/', 'NGAUSS', ngauss)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up periodic spline
 !
   CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.)
   nrank = nx       ! Rank of the FE matrix
 !
 !   Mass matrix assembly
 !
   kl = nidbas
   ku = kl
   CALL init(kl, ku, nrank, 1, mat)
   CALL get_dim(splx, dim)
   WRITE(*,'(/a,4i6)') 'kl, ku, nrank, dim', kl, ku, nrank, dim
 !!$  CALL dismat(splx, mat)
   CALL conmat(splx, mat, coefeq_mass)
 !
 !   Store matrix in hdf5 file
 !
   ALLOCATE(arr(nrank,nrank))
   DO j=1,nrank
      CALL getcol(mat, j, arr(:,j))
   END DO
   CALL putarr(fid, '/mata', arr)
   DEALLOCATE(arr)
 !
 !   Check RHS constructed using input spline coefs.
 !
   ALLOCATE(bcoef(0:dim-1))
   bcoef = 0.0d0; bcoef(ibcoef-1) = 1.0d0
 !
   DO i=nrank,dim-1  ! Periodicity to fill array of spline coefs
      bcoef(i) = bcoef(MODULO(i,nrank))
   END DO
   WRITE(*,'(/a/(10f8.3))') 'bcoef from input', bcoef
   dx = (xgrid(nx)-xgrid(0))/npts
   DO i=0,npts-1
      xpts(i) = xgrid(0) + i*dx
      frhs(i) = rhseq(xpts(i))
   END DO
   CALL creatg(fid, '/rhs')
   CALL putarr(fid,'/rhs/x', xpts)
   CALL putarr(fid,'/rhs/f', frhs)
 !
 !   Assembly RHS and check A*x = f, using method vmx
 !
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(splx, rhs)
   sol = vmx(mat, bcoef(0:nrank-1))
   WRITE(*,'(/6x,3a12)') 'A*x', 'rhs', 'Err'
   errmx = 0.0d0
   DO i=1,nrank
      WRITE(*,'(i6,3(1pe12.3))') i, sol(i), rhs(i), sol(i)-rhs(i)
      errmx=MAX(errmx,ABS(sol(i)-rhs(i)))
   END DO
   WRITE(*,'(a,1pe12.3)') 'Max. error =', errmx
 !
 !   Factor and solve
 !
   CALL factor(mat)
   CALL bsolve(mat, rhs, sol)
   WRITE(*,'(/6x,3a12)') 'Computed', 'Exact', 'Err'
   errmx = 0.0d0
   DO i=1,nrank
      WRITE(*,'(i6,3(1pe12.3))') i, sol(i), bcoef(i-1), sol(i)-bcoef(i-1)
      errmx=MAX(errmx,ABS(sol(i)-bcoef(i-1)))
   END DO
   WRITE(*,'(a,1pe12.3)') 'Max. error =', errmx
 !===========================================================================
 !              9.0  Clean up
 !
   DEALLOCATE(xgrid)
   DEALLOCATE(bcoef)
   DEALLOCATE(rhs, sol)
   CALL destroy(mat)
   CALL destroy_sp(splx)
   CALL closef(fid)  
 CONTAINS
   SUBROUTINE coefeq_mass(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
 ! Mass matrix
 !
     c(1) = 1.0d0
     idt(1) = 0
     idw(1) = 0
   END SUBROUTINE coefeq_mass
 END PROGRAM main
diff --git a/examples/pde1dp_cmpl.f90 b/examples/pde1dp_cmpl.f90
index 48b2d1e..44ca51b 100644
--- a/examples/pde1dp_cmpl.f90
+++ b/examples/pde1dp_cmpl.f90
@@ -1,403 +1,403 @@
 !>
 !> @file pde1dp_cmpl.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE pde1dp_cmpl_mod
   USE bsplines
   USE matrix
   IMPLICIT NONE
 !
 CONTAINS
   SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta)
 !
   TYPE(spline1d) :: spl
   INTEGER, INTENT(in) :: mmode
   DOUBLE COMPLEX, INTENT(in) :: alpha, beta
   DOUBLE COMPLEX, INTENT(out) :: rhs(:)   
 !
   INTEGER :: dim, nrank, nx, nidbas, ngauss
   INTEGER :: i, igauss, it, irow
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE COMPLEX :: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   nrank = SIZE(rhs)
   CALL get_dim(spl, dim, nx, nidbas)
 !
   ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   rhs(:) = 0.0d0
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         contrib = wgauss(igauss) * rhseq(xgauss(igauss))
         DO it=0,nidbas
            irow=MODULO(i+it-1,nx) + 1   ! Periodic BC
            rhs(irow) = rhs(irow) + contrib*fun(it,1)
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
 CONTAINS
   DOUBLE COMPLEX FUNCTION rhseq(x)
     DOUBLE PRECISION, INTENT(in) :: x
     DOUBLE PRECISION :: arg
     arg = mmode*x
     rhseq = (mmode**2*alpha-beta)*COS(arg)
   END FUNCTION rhseq
 END SUBROUTINE disrhs
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dismat(spl, mat, alpha, beta)
 !
   TYPE(spline1d) :: spl
   TYPE(zperiodic_mat) :: mat
   DOUBLE COMPLEX, INTENT(in) :: alpha, beta
 !
   INTEGER :: dim, nx, nidbas, ngauss
   INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE COMPLEX :: contrib
 !
   INTEGER :: kterms                ! Number of terms in weak form
   INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
   DOUBLE COMPLEX, ALLOCATABLE :: coefs(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, dim, nx, nidbas)
   ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
 !
 !   Weak form
 !
   kterms = mat%mat%nterms
   ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         CALL coefeq(xgauss(igauss), idert, iderw, coefs)
         DO iterm=1,kterms
            DO jt=0,nidbas
               DO iw=0,nidbas
                  contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
                       &    fun(iw,iderw(iterm)) * wgauss(igauss)
                  irow=MODULO(i+iw-1,nx) + 1   ! Periodic BC
                  jcol=MODULO(i+jt-1,nx) + 1
                  CALL updtmat(mat, irow, jcol, contrib)
               END DO
            END DO
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
   DEALLOCATE(iderw, idert, coefs)
 !
 CONTAINS
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt))
 !
     c(1) = alpha
     idt(1) = 1
     idw(1) = 1
 !
     c(2) = -beta
     idt(2) = 0
     idw(2) = 0
   END SUBROUTINE coefeq
 END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE save_mat(fid, label, mat)
 !
 !   Save zperiodic_mat in dense format
 !
     USE futils
 !
     INTEGER, INTENT(in) :: fid
     CHARACTER(len=*), INTENT(in) :: label
     TYPE(zperiodic_mat) :: mat
     INTEGER :: j, n
     DOUBLE COMPLEX, ALLOCATABLE :: fullmat(:,:)
 !
     n=mat%mat%rank
     ALLOCATE(fullmat(n,n))
     DO j=1,n
        CALL getcol(mat, j, fullmat(:,j))
     END DO
     CALL putarr(fid, label, fullmat)
     DEALLOCATE(fullmat)
   END SUBROUTINE save_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of complex array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE COMPLEX, INTENT(in) :: x(:)
     DOUBLE PRECISION :: sum2
 !
     sum2 = DOT_PRODUCT(x,x)
     norm2 = SQRT(sum2)
   END FUNCTION norm2
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE meshdist(mmode, x)
 !
 !   Construct a 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
     INTEGER, INTENT(in) :: mmode
     DOUBLE PRECISION, INTENT(inout) :: x(0:)
     INTEGER :: nx, nintg
     DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
     DOUBLE PRECISION :: a, b, dx, f0, f1, scal
     INTEGER :: i, k
 !
     nx = SIZE(x)-1
     a=x(0)
     b=x(nx)
     nintg = 10*nx
     ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
     dx = (b-a)/REAL(nintg)
     xint(0) = a
     fint(0) = 0.0d0
     f1 = fdist(xint(0))
     DO i=1,nintg
        f0 = f1
        xint(i) = xint(i-1) + dx
        f1 = fdist(xint(i))
        fint(i) = fint(i-1) + 0.5*(f0+f1)
     END DO
 !
 !  Normalization
 !
     scal = REAL(nx) / fint(nintg)
     fint(0:nintg) = fint(0:nintg) * scal
 !
 !  Obtain mesh point by (inverse) interpolation
 !
     k = 1
     DO i=1,nintg-1
        IF( fint(i) .GE. REAL(k) ) THEN
           x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
                &   (k-fint(i))
           k = k+1
        END IF
     END DO
 !
     DEALLOCATE(xint, fint)
   CONTAINS
     DOUBLE PRECISION FUNCTION fdist(x)
       DOUBLE PRECISION, INTENT(in) :: x
       fdist = 2.0 + COS(mmode*x)
     END FUNCTION fdist
   END SUBROUTINE meshdist
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE pde1dp_cmpl_mod
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 !
 PROGRAM main
 !
 !   1D complex PDE with periodic BC
 !
   USE pde1dp_cmpl_mod
   USE bsplines
   USE matrix
   USE futils
 !
   IMPLICIT NONE
   TYPE(spline1d) :: splx
   TYPE(zperiodic_mat) :: mat
   INTEGER :: kl, ku, nrank
 !
   CHARACTER(len=128) :: file='pde1dp_cmpl.h5'
   INTEGER :: fid
   INTEGER :: nx, nidbas, ngauss, mmode, npt, dim
   LOGICAL :: nlequid
   DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0
   DOUBLE PRECISION :: dx
   DOUBLE COMPLEX :: alpha, beta
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err
   DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE ::  sol, rhs, bcoef
   DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE ::  solcal, solana
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE PRECISION :: err_norm
   INTEGER :: i
 !
   NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8             ! Number oh intevals in x
   nidbas = 3         ! Degree of splines
   ngauss = 4         ! Number of Gauss points/interval
   nlequid = .TRUE.   ! Use exact sol. as mesh dist. function if .FALSE.
   mmode = 1          ! Fourier mode
   alpha = (1.0, 1.0) ! Complex "diffusion"
   beta  = 1.0
   npt = 100
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x axis
 !
   ALLOCATE(xgrid(0:nx))
   dx = 2.d0*pi/REAL(nx,8)
   xgrid = (/ (i*dx,i=0,nx) /)
   IF( .NOT. nlequid ) THEN
      CALL meshdist(mmode, xgrid)
   END IF
   WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NIDBAS', nidbas)
   CALL putarr(fid, '/xgrid', xgrid)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up periodic spline
 !
   CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.)
   WRITE(*,'(a,l6)') 'nlequid =', nlequid
   nrank = nx       ! Rank of the FE matrix
 !
 !   FE matrix assembly
 !
   kl = nidbas
   ku = kl
   CALL init(kl, ku, nrank, 2, mat)
   CALL get_dim(splx, dim)
   WRITE(*,'(/a,4i6)') 'kl, ku, nrank, dim', kl, ku, nrank, dim
   CALL dismat(splx, mat, alpha, beta)
 !
 !   RHS assembly
 !  
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(splx, rhs, mmode, alpha, beta)
 !
   CALL save_mat(fid, '/mat', mat)
   CALL putarr(fid, '/rhs', rhs)
 !
 !   Factor and solve
 !
   CALL factor(mat)
   CALL bsolve(mat, rhs, sol)
   CALL putarr(fid, '/sol', sol)
 !===========================================================================
 !              3.0   Check solution
 !
 !   Exact solution
   ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt))
   dx=2.0d0*pi/REAL(npt,8)
   x = (/ (i*dx, i=0,npt) /)
   solana = COS(mmode*x)
 !
 !   Prolongate solution using periodicity
 !
   ALLOCATE(bcoef(dim))
   bcoef(1:nrank) = sol(1:nrank)
   DO i=nrank+1,dim
      bcoef(i) = bcoef(MODULO(i-1,nrank)+1)
   END DO
 !
 !   Interpolate field
 !
   CALL gridval(splx, x, solcal, 0, bcoef)
 !
   err = ABS(solcal-solana)
   CALL putarr(fid, '/x', x)
   CALL putarr(fid, '/solana', solana)
   CALL putarr(fid, '/solcal', solcal)
   CALL putarr(fid, '/err', err)
 !
 !   Compute discretization error norm by Gauss integration
 !
   err_norm=0.0
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
   DO i=1,nx
      CALL get_gauss(splx, ngauss, i, xgauss, wgauss)
      CALL gridval(splx, xgauss, solcal(1:ngauss), 0)
      solana(1:ngauss) = COS(mmode*xgauss)
      err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), &
           &          solana(1:ngauss)-solcal(1:ngauss))
      err_norm = err_norm + SUM(wgauss*err(1:ngauss))
   END DO
   err_norm = SQRT(err_norm)
   WRITE(*,'(a,1pe12.3)') 'Discretization error ',  err_norm
 !
   DEALLOCATE(x, solcal, solana, err)
   DEALLOCATE(xgauss, wgauss)
   DEALLOCATE(bcoef)
 !===========================================================================
 !              9.0  Clean up
 !
   DEALLOCATE(xgrid)
   DEALLOCATE(rhs, sol)
   CALL destroy(mat)
   CALL destroy_sp(splx)
   CALL closef(fid)  
 END PROGRAM main
diff --git a/examples/pde1dp_cmpl_dft.f90 b/examples/pde1dp_cmpl_dft.f90
index d866885..9fbf2dc 100644
--- a/examples/pde1dp_cmpl_dft.f90
+++ b/examples/pde1dp_cmpl_dft.f90
@@ -1,290 +1,290 @@
 !>
 !> @file pde1dp_cmpl_dft.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE pde1dp_cmpl_dft_mod
   USE bsplines
   IMPLICIT NONE
 !
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dismat(spl, mat, alpha, beta)
 !
     USE bsplines
     TYPE(spline1d)             :: spl
     DOUBLE COMPLEX             :: mat(:)
     DOUBLE COMPLEX, INTENT(in) :: alpha, beta
 !
     INTEGER :: dim, nx, nidbas, ngauss, intv, igauss
     DOUBLE COMPLEX, ALLOCATABLE :: ft_fun(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, dim, nx, nidbas)
   CALL get_gauss(spl, ngauss)
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
   ALLOCATE(ft_fun(0:nx-1,2))  ! Up to first derivative
 !
 !   Weak form: integration on first interval
 !
   intv = 1
   CALL get_gauss(spl, ngauss, intv, xgauss, wgauss)
   mat = 0.0d0
   DO igauss=1,ngauss
      CALL ft_basfun(xgauss(igauss), spl, ft_fun, intv)
      mat(:) = mat(:) + wgauss(igauss) * ( &
           &       alpha*ft_fun(:,2)*CONJG(ft_fun(:,2)) &
           &      - beta*ft_fun(:,1)*CONJG(ft_fun(:,1))  &
           &       )
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(ft_fun)
   DEALLOCATE(xgauss, wgauss)
 !    
   END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta)
 !
     TYPE(spline1d) :: spl
     INTEGER, INTENT(in) :: mmode
     DOUBLE COMPLEX, INTENT(in) :: alpha, beta
     DOUBLE COMPLEX, INTENT(out) :: rhs(:)   
 !
     INTEGER :: dim, nrank, nx, nidbas, ngauss
     INTEGER :: i, igauss, it, irow
     DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
     DOUBLE COMPLEX :: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     nrank = SIZE(rhs)
     CALL get_dim(spl, dim, nx, nidbas)
 !
     ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl, ngauss)
     ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
     rhs(:) = 0.0d0
 !
     DO i=1,nx
        CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
        DO igauss=1,ngauss
           CALL basfun(xgauss(igauss), spl, fun, i)
           contrib = wgauss(igauss) * rhseq(xgauss(igauss))
           DO it=0,nidbas
              irow=MODULO(i+it-1,nx) + 1   ! Periodic BC
              rhs(irow) = rhs(irow) + contrib*fun(it,1)
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(fun)
     DEALLOCATE(xgauss, wgauss)
   CONTAINS
     DOUBLE COMPLEX FUNCTION rhseq(x)
       DOUBLE PRECISION, INTENT(in) :: x
       DOUBLE PRECISION :: arg
       arg = mmode*x
       rhseq = (mmode**2*alpha-beta)*COS(arg)
     END FUNCTION rhseq
   END SUBROUTINE disrhs
 END MODULE pde1dp_cmpl_dft_mod
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 !
 PROGRAM main
 !
 !   1D complex PDE with periodic BC, using DFT
 !
   USE pde1dp_cmpl_dft_mod
   USE bsplines
   USE matrix
   USE futils
   USE fft
 !
   IMPLICIT NONE
   TYPE(spline1d) :: splx
   DOUBLE COMPLEX, ALLOCATABLE :: mat(:)
   INTEGER ::nrank
 !
   CHARACTER(len=128) :: file='pde1dp_cmpl_dft.h5'
   INTEGER :: fid
   INTEGER :: nx, nidbas, ngauss, mmode, npt, dim
   DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0
   DOUBLE PRECISION :: dx
   DOUBLE COMPLEX :: alpha, beta
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err
   DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE ::  sol, rhs, bcoef
   DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE ::  sol_shifted, rhs_shifted
   DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE ::  solcal, solana
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE PRECISION :: err_norm
   INTEGER :: i
   INTEGER :: k, kmin, kmax
 !
   NAMELIST /newrun/ nx, nidbas, ngauss, alpha, beta, mmode, npt
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8             ! Number oh intevals in x
   nidbas = 3         ! Degree of splines
   ngauss = 4         ! Number of Gauss points/interval
   mmode = 1          ! Fourier mode
   alpha = (1.0, 1.0) ! Complex "diffusion"
   beta  = 1.0
   npt = 100
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x axis
 !
   ALLOCATE(xgrid(0:nx))
   dx = 2.d0*pi/REAL(nx,8)
   xgrid = (/ (i*dx,i=0,nx) /)
   WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NIDBAS', nidbas)
   CALL putarr(fid, '/xgrid', xgrid)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up periodic spline
 !
   CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.)
   nrank = nx       ! Rank of the FE matrix
   CALL get_dim(splx, dim)
   WRITE(*,'(/a,4i6)') 'nrank, dim', nrank, dim
 !
 !   Init DFT
   kmin = -nx/2
   kmax = nx/2-1
   CALL init_dft(splx, kmin, kmax)
 !
 !   FE matrix assembly in Fourier space
 !
   ALLOCATE(mat(0:nx-1))
   CALL dismat(splx, mat, alpha, beta)
   CALL putarr(fid, '/mat', mat)
 !
 !   RHS assembly in real space
 !  
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(splx, rhs, mmode, alpha, beta)
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
 !  Fourier solve
 !
   CALL putarr(fid, '/rhs', rhs)
 !
   CALL fourcol(rhs, 1)
 !
   ALLOCATE(rhs_shifted(kmin:kmax))
   ALLOCATE(sol_shifted(kmin:kmax))
   DO k=kmin,kmax
      rhs_shifted(k) = rhs(MODULO(k+nx,nx)+1)/REAL(nx,8)
   END DO
   sol_shifted = rhs_shifted / mat
   DO k=kmin,kmax
      sol(MODULO(k+nx,nx)+1) = sol_shifted(k)
   END DO
 !
   CALL putarr(fid, '/rhs_fft', rhs)
   CALL putarr(fid, '/sol_fft', sol)
 !
 !   Solution in real space
 !
   CALL fourcol(sol,-1)
   CALL putarr(fid, '/sol', sol)
 !===========================================================================
 !              4.0   Check solution
 !
 !   Exact solution
   ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt))
   dx=2.0d0*pi/REAL(npt,8)
   x = (/ (i*dx, i=0,npt) /)
   solana = COS(mmode*x)
 !
 !   Prolongate solution using periodicity
 !
   ALLOCATE(bcoef(dim))
   bcoef(1:nrank) = sol(1:nrank)
   DO i=nrank+1,dim
      bcoef(i) = bcoef(MODULO(i-1,nrank)+1)
   END DO
 !
 !   Interpolate field
 !
   CALL gridval(splx, x, solcal, 0, bcoef)
 !
   err = ABS(solcal-solana)
   CALL putarr(fid, '/x', x)
   CALL putarr(fid, '/solana', solana)
   CALL putarr(fid, '/solcal', solcal)
   CALL putarr(fid, '/err', err)
 !
 !   Compute discretization error norm by Gauss integration
 !
   err_norm=0.0
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
   DO i=1,nx
      CALL get_gauss(splx, ngauss, i, xgauss, wgauss)
      CALL gridval(splx, xgauss, solcal(1:ngauss), 0)
      solana(1:ngauss) = COS(mmode*xgauss)
      err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), &
           &          solana(1:ngauss)-solcal(1:ngauss))
      err_norm = err_norm + SUM(wgauss*err(1:ngauss))
   END DO
   err_norm = SQRT(err_norm)
   WRITE(*,'(a,1pe12.3)') 'Discretization error ',  err_norm
 !!
 !===========================================================================
 !              9.0  Clean up
 !
   DEALLOCATE(xgrid)
   DEALLOCATE(mat)
   CALL destroy_sp(splx)
   CALL closef(fid)  
 END PROGRAM main
diff --git a/examples/pde1dp_cmpl_mumps.f90 b/examples/pde1dp_cmpl_mumps.f90
index ab14f2a..ef2f1d8 100644
--- a/examples/pde1dp_cmpl_mumps.f90
+++ b/examples/pde1dp_cmpl_mumps.f90
@@ -1,478 +1,478 @@
 !>
 !> @file pde1dp_cmpl_mumps.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE pde1dp_cmpl_mumps_mod
   USE bsplines
   USE mumps_bsplines
   IMPLICIT NONE
 !
 CONTAINS
   SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta)
 !
   TYPE(spline1d) :: spl
   INTEGER, INTENT(in) :: mmode
   DOUBLE COMPLEX, INTENT(in) :: alpha, beta
   DOUBLE COMPLEX, INTENT(out) :: rhs(:)   
 !
   INTEGER :: dim, nrank, nx, nidbas, ngauss
   INTEGER :: i, igauss, it, irow
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE COMPLEX :: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   nrank = SIZE(rhs)
   CALL get_dim(spl, dim, nx, nidbas)
 !
   ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   rhs(:) = 0.0d0
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         contrib = wgauss(igauss) * rhseq(xgauss(igauss))
         DO it=0,nidbas
            irow=MODULO(i+it-1,nx) + 1   ! Periodic BC
            rhs(irow) = rhs(irow) + contrib*fun(it,1)
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
 CONTAINS
   DOUBLE COMPLEX FUNCTION rhseq(x)
     DOUBLE PRECISION, INTENT(in) :: x
     DOUBLE PRECISION :: arg
     arg = mmode*x
     rhseq = (mmode**2*alpha-beta)*COS(arg)
   END FUNCTION rhseq
 END SUBROUTINE disrhs
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dismat(spl, mat, alpha, beta)
 !
   TYPE(spline1d) :: spl
   TYPE(zmumps_mat) :: mat
   DOUBLE COMPLEX, INTENT(in) :: alpha, beta
 !
   INTEGER :: dim, nx, nidbas, ngauss
   INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE COMPLEX :: contrib
 !
   INTEGER :: kterms                ! Number of terms in weak form
   INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
   DOUBLE COMPLEX, ALLOCATABLE :: coefs(:)
 !
   INTEGER :: istart, iend
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, dim, nx, nidbas)
   ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
 !
 !   Weak form
 !
   kterms = mat%nterms
   ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   istart = mat%istart
   iend   = mat%iend
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         CALL coefeq(xgauss(igauss), idert, iderw, coefs)
         DO iterm=1,kterms
            DO iw=0,nidbas
               irow=MODULO(i+iw-1,nx) + 1   ! Periodic BC
               IF( irow.GE.istart .AND. irow.LE.iend) THEN
                  DO jt=0,nidbas
                     contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
                          &    fun(iw,iderw(iterm)) * wgauss(igauss)
                     jcol=MODULO(i+jt-1,nx) + 1
                     CALL updtmat(mat, irow, jcol, contrib)
                  END DO
               END IF
            END DO
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
   DEALLOCATE(iderw, idert, coefs)
 !
 CONTAINS
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt))
 !
     c(1) = alpha
     idt(1) = 1
     idw(1) = 1
 !
     c(2) = -beta
     idt(2) = 0
     idw(2) = 0
   END SUBROUTINE coefeq
 END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of complex array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE COMPLEX, INTENT(in) :: x(:)
     DOUBLE PRECISION :: sum2
 !
     sum2 = DOT_PRODUCT(x,x)
     norm2 = SQRT(sum2)
   END FUNCTION norm2
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE meshdist(mmode, x)
 !
 !   Construct a 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
     INTEGER, INTENT(in) :: mmode
     DOUBLE PRECISION, INTENT(inout) :: x(0:)
     INTEGER :: nx, nintg
     DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
     DOUBLE PRECISION :: a, b, dx, f0, f1, scal
     INTEGER :: i, k
 !
     nx = SIZE(x)-1
     a=x(0)
     b=x(nx)
     nintg = 10*nx
     ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
     dx = (b-a)/REAL(nintg)
     xint(0) = a
     fint(0) = 0.0d0
     f1 = fdist(xint(0))
     DO i=1,nintg
        f0 = f1
        xint(i) = xint(i-1) + dx
        f1 = fdist(xint(i))
        fint(i) = fint(i-1) + 0.5*(f0+f1)
     END DO
 !
 !  Normalization
 !
     scal = REAL(nx) / fint(nintg)
     fint(0:nintg) = fint(0:nintg) * scal
 !
 !  Obtain mesh point by (inverse) interpolation
 !
     k = 1
     DO i=1,nintg-1
        IF( fint(i) .GE. REAL(k) ) THEN
           x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
                &   (k-fint(i))
           k = k+1
        END IF
     END DO
 !
     DEALLOCATE(xint, fint)
   CONTAINS
     DOUBLE PRECISION FUNCTION fdist(x)
       DOUBLE PRECISION, INTENT(in) :: x
       fdist = 2.0 + COS(mmode*x)
     END FUNCTION fdist
   END SUBROUTINE meshdist
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE pde1dp_cmpl_mumps_mod
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 !
 PROGRAM main
 !
 !   1D complex PDE with periodic BC
 !
   USE pde1dp_cmpl_mumps_mod
   USE futils
 !
   IMPLICIT NONE
   INCLUDE 'mpif.h'
   TYPE(spline1d) :: splx
   TYPE(zmumps_mat) :: mat
   TYPE(zmumps_mat) :: newmat
   INTEGER :: kl, ku, nrank
 !
   CHARACTER(len=128) :: file='pde1dp_cmpl_mumps.h5'
   INTEGER :: fid
   INTEGER :: nx, nidbas, ngauss, mmode, npt, dim
   LOGICAL :: nlequid
   LOGICAL :: nlsym, nlherm, nlpos
   DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0
   DOUBLE PRECISION :: dx
   DOUBLE COMPLEX :: alpha, beta
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err
   DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef
   DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: newsol, arow
   DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE PRECISION :: err_norm
   INTEGER :: i
   INTEGER :: ierr, me
   INTEGER :: nzfact
 !
   NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt, &
        &            nlsym, nlherm, nlpos
 !===========================================================================
 !              1.0 Prologue
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
 !   Read in data specific to run
 !
   nx = 8             ! Number oh intevals in x
   nidbas = 3         ! Degree of splines
   ngauss = 4         ! Number of Gauss points/interval
   nlequid = .TRUE.   ! Use exact sol. as mesh dist. function if .FALSE.
   mmode = 1          ! Fourier mode
   alpha = (1.0, 1.0) ! Complex "diffusion"
   beta  = 1.0
   npt = 100
   nlsym = .TRUE.    ! Is matrice symmetric
   nlherm = .FALSE.  ! Is matrice hermitian
   nlpos = .TRUE.    ! and positive definite ?
 !
   IF(me.EQ.0) THEN
      READ(*,newrun)
      WRITE(*,newrun)
   END IF
   CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nidbas, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(ngauss, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nlequid, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(mmode, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(alpha, 1, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(beta, 1, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(npt, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nlherm, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nlpos, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
 !
 !   Define grid on x axis
 !
   ALLOCATE(xgrid(0:nx))
   dx = 2.d0*pi/REAL(nx,8)
   xgrid = (/ (i*dx,i=0,nx) /)
   IF( .NOT. nlequid ) THEN
      CALL meshdist(mmode, xgrid)
   END IF
   IF(me.EQ.0) WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
 !
 !   Create hdf5 file
 !
   IF(me.EQ.0) THEN
      CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d')
      CALL attach(fid, '/', 'NX', nx)
      CALL attach(fid, '/', 'NIDBAS', nidbas)
      CALL putarr(fid, '/xgrid', xgrid)
   END IF
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up periodic spline
 !
   CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.)
   CALL get_dim(splx, dim)
   nrank = nx       ! Rank of the FE matrix
 !
 !   FE matrix assembly
 !
   CALL init(nrank, 2, mat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos)
   WRITE(*,'(a,i4.4,a,3i6)') 'PE', me, ' istart, iend, nloc', mat%istart, mat%iend, &
        &        mat%iend-mat%istart+1
 !
   CALL dismat(splx, mat, alpha, beta)
 !
 !   RHS assembly
 !  
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(splx, rhs, mmode, alpha, beta)
 !
   IF(me.EQ.0) CALL putarr(fid, '/rhs', rhs)
 !
 !   Factor and solve
 !
   CALL factor(mat, debug=.FALSE.)
   CALL bsolve(mat, rhs, sol, debug=.FALSE.)
 !
   nzfact = mat%mumps_par%INFOG(29)
   IF(nzfact<0) THEN
      nzfact = -nzfact*1000000
   END IF
   IF(me.EQ.0) THEN
      CALL putarr(fid, '/sol', sol)
      WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix       = ',get_count(mat)
      WRITE(*,'(a,i8)')  'Number of nonzeros in factors of A  = ',nzfact
 !
 !   Compute residue
 !
      WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(mat,sol)-rhs)
   END IF
 !===========================================================================
 !              3.0   Check solution
 !
   IF(me.EQ.0) THEN
 !
 !   Exact solution
      ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt))
      dx=2.0d0*pi/REAL(npt,8)
      x = (/ (i*dx, i=0,npt) /)
      solana = COS(mmode*x)
 !
 !   Prolongate solution using periodicity
 !
      ALLOCATE(bcoef(dim))
      bcoef(1:nrank) = sol(1:nrank)
      DO i=nrank+1,dim
         bcoef(i) = bcoef(MODULO(i-1,nrank)+1)
      END DO
 !
 !   Interpolate field
 !
      CALL gridval(splx, x, solcal, 0, bcoef)
 !
      err = ABS(solcal-solana)
      CALL putarr(fid, '/x', x)
      CALL putarr(fid, '/solana', solana)
      CALL putarr(fid, '/solcal', solcal)
      CALL putarr(fid, '/err', err)
 !
 !   Compute discretization error norm by Gauss integration
 !
      err_norm=0.0
      ALLOCATE(xgauss(ngauss), wgauss(ngauss))
      DO i=1,nx
         CALL get_gauss(splx, ngauss, i, xgauss, wgauss)
         CALL gridval(splx, xgauss, solcal(1:ngauss), 0)
         solana(1:ngauss) = COS(mmode*xgauss)
         err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), &
              &          solana(1:ngauss)-solcal(1:ngauss))
         err_norm = err_norm + SUM(wgauss*err(1:ngauss))
      END DO
      err_norm = SQRT(err_norm)
      WRITE(*,'(a,1pe12.3)') 'Discretization error ',  err_norm
   END IF
 !
 !===========================================================================
 !              4.0 Test of getrow/putrow, getcol/putcol and mcopy
 !
   CALL init(nrank, 2, newmat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos)
   ALLOCATE(arow(nrank), newsol(nrank))
 !
   DO i=1,nrank
      CALL getrow(mat, i, arow)
      CALL putrow(newmat, i, arow)
   END DO
   CALL factor(newmat)
   CALL bsolve(newmat, rhs, newsol)
   WRITE(*,'(/a)') 'putrow/getrow ...'
   WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs)
   WRITE(*,'(a,1pe12.3)') 'Error ',  norm2(sol-newsol)
 !
   DO i=1,nrank
      CALL getcol(mat, i, arow)
      CALL putcol(newmat, i, arow)
   END DO
   CALL factor(newmat)
   CALL bsolve(newmat, rhs, newsol)
   WRITE(*,'(/a)') 'putcol/getcol ...'
   WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs)
   WRITE(*,'(a,1pe12.3)') 'Error ',  norm2(sol-newsol)
 !
   CALL clear_mat(newmat)
   CALL mcopy(mat, newmat)
   WRITE(*,'(/a)') 'mcopy ...'
   newmat%val = (1000.0d0,0.0d0)*newmat%val
   CALL factor(newmat)
   CALL bsolve(newmat, rhs, newsol)
   WRITE(*,'(a)') 'Backsolve the new system'
   WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol)
 !
   WRITE(*,'(a)') 'Destroy NEWMAT ...'
   CALL destroy(newmat)
 !
   CALL bsolve(mat, rhs, sol)
   WRITE(*,'(/a)') 'Backsolve the old system'
   WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(sol)
 !
   WRITE(*,'(a)') 'Destroy MAT ...'
   CALL destroy(mat)
 !
 !!$  WRITE(*,'(/a)') 'Should crash since NEWMAT is gone!'
 !!$  CALL bsolve(newmat, rhs, newsol)
 !===========================================================================
 !              9.0  Clean up
 !
   IF(me.EQ.0) THEN
      DEALLOCATE(x, solcal, solana, err)
      DEALLOCATE(bcoef)
      DEALLOCATE(xgauss, wgauss)
   END IF
   DEALLOCATE(xgrid)
   DEALLOCATE(rhs, sol)
   DEALLOCATE(arow, newsol)
   CALL destroy_sp(splx)
   IF(me.EQ.0) CALL closef(fid)  
   CALL mpi_finalize(ierr)
 END PROGRAM main
diff --git a/examples/pde1dp_cmpl_pardiso.f90 b/examples/pde1dp_cmpl_pardiso.f90
index e3cf4a5..fccd3f6 100644
--- a/examples/pde1dp_cmpl_pardiso.f90
+++ b/examples/pde1dp_cmpl_pardiso.f90
@@ -1,457 +1,457 @@
 !>
 !> @file pde1dp_cmpl_pardiso.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE pde1dp_cmpl_pardiso_mod
   USE bsplines
   USE pardiso_bsplines
   IMPLICIT NONE
 !
 CONTAINS
   SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta)
 !
   TYPE(spline1d) :: spl
   INTEGER, INTENT(in) :: mmode
   DOUBLE COMPLEX, INTENT(in) :: alpha, beta
   DOUBLE COMPLEX, INTENT(out) :: rhs(:)   
 !
   INTEGER :: dim, nrank, nx, nidbas, ngauss
   INTEGER :: i, igauss, it, irow
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE COMPLEX :: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   nrank = SIZE(rhs)
   CALL get_dim(spl, dim, nx, nidbas)
 !
   ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   rhs(:) = 0.0d0
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         contrib = wgauss(igauss) * rhseq(xgauss(igauss))
         DO it=0,nidbas
            irow=MODULO(i+it-1,nx) + 1   ! Periodic BC
            rhs(irow) = rhs(irow) + contrib*fun(it,1)
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
 CONTAINS
   DOUBLE COMPLEX FUNCTION rhseq(x)
     DOUBLE PRECISION, INTENT(in) :: x
     DOUBLE PRECISION :: arg
     arg = mmode*x
     rhseq = (mmode**2*alpha-beta)*COS(arg)
   END FUNCTION rhseq
 END SUBROUTINE disrhs
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dismat(spl, mat, alpha, beta)
 !
   TYPE(spline1d) :: spl
   TYPE(zpardiso_mat) :: mat
   DOUBLE COMPLEX, INTENT(in) :: alpha, beta
 !
   INTEGER :: dim, nx, nidbas, ngauss
   INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE COMPLEX :: contrib
 !
   INTEGER :: kterms                ! Number of terms in weak form
   INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
   DOUBLE COMPLEX, ALLOCATABLE :: coefs(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, dim, nx, nidbas)
   ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
 !
 !   Weak form
 !
   kterms = mat%nterms
   ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         CALL coefeq(xgauss(igauss), idert, iderw, coefs)
         DO iterm=1,kterms
            DO jt=0,nidbas
               DO iw=0,nidbas
                  contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
                       &    fun(iw,iderw(iterm)) * wgauss(igauss)
                  irow=MODULO(i+iw-1,nx) + 1   ! Periodic BC
                  jcol=MODULO(i+jt-1,nx) + 1
                  CALL updtmat(mat, irow, jcol, contrib)
               END DO
            END DO
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
   DEALLOCATE(iderw, idert, coefs)
 !
 CONTAINS
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt))
 !
     c(1) = alpha
     idt(1) = 1
     idw(1) = 1
 !
     c(2) = -beta
     idt(2) = 0
     idw(2) = 0
   END SUBROUTINE coefeq
 END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of complex array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE COMPLEX, INTENT(in) :: x(:)
     DOUBLE PRECISION :: sum2
 !
     sum2 = DOT_PRODUCT(x,x)
     norm2 = SQRT(sum2)
   END FUNCTION norm2
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE meshdist(mmode, x)
 !
 !   Construct a 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
     INTEGER, INTENT(in) :: mmode
     DOUBLE PRECISION, INTENT(inout) :: x(0:)
     INTEGER :: nx, nintg
     DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
     DOUBLE PRECISION :: a, b, dx, f0, f1, scal
     INTEGER :: i, k
 !
     nx = SIZE(x)-1
     a=x(0)
     b=x(nx)
     nintg = 10*nx
     ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
     dx = (b-a)/REAL(nintg)
     xint(0) = a
     fint(0) = 0.0d0
     f1 = fdist(xint(0))
     DO i=1,nintg
        f0 = f1
        xint(i) = xint(i-1) + dx
        f1 = fdist(xint(i))
        fint(i) = fint(i-1) + 0.5*(f0+f1)
     END DO
 !
 !  Normalization
 !
     scal = REAL(nx) / fint(nintg)
     fint(0:nintg) = fint(0:nintg) * scal
 !
 !  Obtain mesh point by (inverse) interpolation
 !
     k = 1
     DO i=1,nintg-1
        IF( fint(i) .GE. REAL(k) ) THEN
           x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
                &   (k-fint(i))
           k = k+1
        END IF
     END DO
 !
     DEALLOCATE(xint, fint)
   CONTAINS
     DOUBLE PRECISION FUNCTION fdist(x)
       DOUBLE PRECISION, INTENT(in) :: x
       fdist = 2.0 + COS(mmode*x)
     END FUNCTION fdist
   END SUBROUTINE meshdist
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE pde1dp_cmpl_pardiso_mod
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 !
 PROGRAM main
 !
 !   1D complex PDE with periodic BC
 !
   USE pde1dp_cmpl_pardiso_mod
   USE futils
   USE conmat_mod
 !
   IMPLICIT NONE
   TYPE(spline1d) :: splx
   TYPE(zpardiso_mat) :: mat
   TYPE(zpardiso_mat) :: newmat
   INTEGER :: kl, ku, nrank
 !
   CHARACTER(len=128) :: file='pde1dp_cmpl_pardiso.h5'
   INTEGER :: fid
   INTEGER :: nx, nidbas, ngauss, mmode, npt, dim
   LOGICAL :: nlequid
   LOGICAL :: nlsym, nlherm, nlpos
   DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0
   DOUBLE PRECISION :: dx
   DOUBLE COMPLEX :: alpha, beta
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err
   DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef
   DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: newsol, arow
   DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE PRECISION :: err_norm
   INTEGER :: i
 !
   NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt, &
        &            nlsym, nlherm, nlpos
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8             ! Number oh intevals in x
   nidbas = 3         ! Degree of splines
   ngauss = 4         ! Number of Gauss points/interval
   nlequid = .TRUE.   ! Use exact sol. as mesh dist. function if .FALSE.
   mmode = 1          ! Fourier mode
   alpha = (1.0, 1.0) ! Complex "diffusion"
   beta  = 1.0
   npt = 100
   nlsym = .TRUE.    ! Is matrice symmetric
   nlherm = .FALSE.  ! Is matrice hermitian
   nlpos = .TRUE.    ! and positive definite ?
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x axis
 !
   ALLOCATE(xgrid(0:nx))
   dx = 2.d0*pi/REAL(nx,8)
   xgrid = (/ (i*dx,i=0,nx) /)
   IF( .NOT. nlequid ) THEN
      CALL meshdist(mmode, xgrid)
   END IF
   WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NIDBAS', nidbas)
   CALL putarr(fid, '/xgrid', xgrid)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up periodic spline
 !
   CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.)
   WRITE(*,'(a,l6)') 'nlequid =', nlequid
   nrank = nx       ! Rank of the FE matrix
 !
 !   FE matrix assembly
 !
   CALL init(nrank, 2, mat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos)
   CALL get_dim(splx, dim)
   WRITE(*,'(/a,4i6)') 'nrank, dim', nrank, dim
 !!$  CALL dismat(splx, mat, alpha, beta)
   CALL conmat(splx, mat, coefeq)
 !
 !   RHS assembly
 !  
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(splx, rhs, mmode, alpha, beta)
 !
   CALL putarr(fid, '/rhs', rhs)
 !
 !   Factor and solve
 !
   WRITE(*,'(a/(10i6))') 'iparm', mat%p%iparm
   CALL factor(mat)
   CALL putmat(fid,'/MAT', mat)
   CALL bsolve(mat, rhs, sol)
   CALL putarr(fid, '/sol', sol)
   WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix       = ',get_count(mat)
   WRITE(*,'(a,i8)')  'Number of nonzeros in factors of A  = ',mat%p%iparm(18)
 !
 !   Compute residue
 !
   WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(mat,sol)-rhs)
 !===========================================================================
 !              3.0   Check solution
 !
 !   Exact solution
   ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt))
   dx=2.0d0*pi/REAL(npt,8)
   x = (/ (i*dx, i=0,npt) /)
   solana = COS(mmode*x)
 !
 !   Prolongate solution using periodicity
 !
   ALLOCATE(bcoef(dim))
   bcoef(1:nrank) = sol(1:nrank)
   DO i=nrank+1,dim
      bcoef(i) = bcoef(MODULO(i-1,nrank)+1)
   END DO
 !
 !   Interpolate field
 !
   CALL gridval(splx, x, solcal, 0, bcoef)
 !
   err = ABS(solcal-solana)
   CALL putarr(fid, '/x', x)
   CALL putarr(fid, '/solana', solana)
   CALL putarr(fid, '/solcal', solcal)
   CALL putarr(fid, '/err', err)
 !
 !   Compute discretization error norm by Gauss integration
 !
   err_norm=0.0
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
   DO i=1,nx
      CALL get_gauss(splx, ngauss, i, xgauss, wgauss)
      CALL gridval(splx, xgauss, solcal(1:ngauss), 0)
      solana(1:ngauss) = COS(mmode*xgauss)
      err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), &
           &          solana(1:ngauss)-solcal(1:ngauss))
      err_norm = err_norm + SUM(wgauss*err(1:ngauss))
   END DO
   err_norm = SQRT(err_norm)
   WRITE(*,'(a,1pe12.3)') 'Discretization error ',  err_norm
 !
 !===========================================================================
 !              4.0 Test of getrow/putrow, getcol/putcol and mcopy
 !
   CALL init(nrank, 2, newmat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos)
   ALLOCATE(arow(nrank), newsol(nrank))
 !
   DO i=1,nrank
      CALL getrow(mat, i, arow)
      CALL putrow(newmat, i, arow)
   END DO
   CALL factor(newmat)
   CALL bsolve(newmat, rhs, newsol)
   WRITE(*,'(/a)') 'putrow/getrow ...'
   WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs)
   WRITE(*,'(a,1pe12.3)') 'Error ',  norm2(sol-newsol)
 !
   DO i=1,nrank
      CALL getcol(mat, i, arow)
      CALL putcol(newmat, i, arow)
   END DO
   CALL factor(newmat)
   CALL bsolve(newmat, rhs, newsol)
   WRITE(*,'(/a)') 'putcol/getcol ...'
   WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs)
   WRITE(*,'(a,1pe12.3)') 'Error ',  norm2(sol-newsol)
 !
   CALL clear_mat(newmat)
   CALL mcopy(mat, newmat)
   WRITE(*,'(/a)') 'mcopy ...'
   newmat%val = (1000.0d0,0.0d0)*newmat%val
   CALL factor(newmat)
   CALL bsolve(newmat, rhs, newsol)
   WRITE(*,'(a)') 'Backsolve the new system'
   WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol)
 !
   WRITE(*,'(a)') 'Destroy NEWMAT ...'
   CALL destroy(newmat)
 !
   CALL bsolve(mat, rhs, sol)
   WRITE(*,'(/a)') 'Backsolve the old system'
   WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(sol)
 !
   WRITE(*,'(a)') 'Destroy MAT ...'
   CALL destroy(mat)
 !!$!
 !!$  WRITE(*,'(/a)') 'Should crash since NEWMAT is gone!'
 !!$  CALL bsolve(newmat, rhs, newsol)
 !!$  WRITE(*,'(a)') 'Backsolve the new system'
 !!$  WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol)
 !===========================================================================
 !              9.0  Clean up
 !
   DEALLOCATE(x, solcal, solana, err)
   DEALLOCATE(xgauss, wgauss)
   DEALLOCATE(bcoef)
   DEALLOCATE(xgrid)
   DEALLOCATE(rhs, sol)
   DEALLOCATE(arow, newsol)
   CALL destroy_sp(splx)
   CALL closef(fid)  
 CONTAINS
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE COMPLEX, INTENT(out) :: c(:)
 !
     c(1) = alpha
     idt(1) = 1
     idw(1) = 1
 !
     c(2) = -beta
     idt(2) = 0
     idw(2) = 0
   END SUBROUTINE coefeq
 END PROGRAM main
diff --git a/examples/pde1dp_cmpl_wsmp.f90 b/examples/pde1dp_cmpl_wsmp.f90
index 1760563..3e12386 100644
--- a/examples/pde1dp_cmpl_wsmp.f90
+++ b/examples/pde1dp_cmpl_wsmp.f90
@@ -1,436 +1,436 @@
 !>
 !> @file pde1dp_cmpl_wsmp.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE pde1dp_cmpl_wsmp_mod
   USE bsplines
   USE wsmp_bsplines
   IMPLICIT NONE
 !
 CONTAINS
   SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta)
 !
   TYPE(spline1d) :: spl
   INTEGER, INTENT(in) :: mmode
   DOUBLE COMPLEX, INTENT(in) :: alpha, beta
   DOUBLE COMPLEX, INTENT(out) :: rhs(:)   
 !
   INTEGER :: dim, nrank, nx, nidbas, ngauss
   INTEGER :: i, igauss, it, irow
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE COMPLEX :: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   nrank = SIZE(rhs)
   CALL get_dim(spl, dim, nx, nidbas)
 !
   ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   rhs(:) = 0.0d0
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         contrib = wgauss(igauss) * rhseq(xgauss(igauss))
         DO it=0,nidbas
            irow=MODULO(i+it-1,nx) + 1   ! Periodic BC
            rhs(irow) = rhs(irow) + contrib*fun(it,1)
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
 CONTAINS
   DOUBLE COMPLEX FUNCTION rhseq(x)
     DOUBLE PRECISION, INTENT(in) :: x
     DOUBLE PRECISION :: arg
     arg = mmode*x
     rhseq = (mmode**2*alpha-beta)*COS(arg)
   END FUNCTION rhseq
 END SUBROUTINE disrhs
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dismat(spl, mat, alpha, beta)
 !
   TYPE(spline1d) :: spl
   TYPE(zwsmp_mat) :: mat
   DOUBLE COMPLEX, INTENT(in) :: alpha, beta
 !
   INTEGER :: dim, nx, nidbas, ngauss
   INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
   DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE COMPLEX :: contrib
 !
   INTEGER :: kterms                ! Number of terms in weak form
   INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
   DOUBLE COMPLEX, ALLOCATABLE :: coefs(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, dim, nx, nidbas)
   ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
 !
 !   Weak form
 !
   kterms = mat%nterms
   ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl, ngauss)
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
   DO i=1,nx
      CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
      DO igauss=1,ngauss
         CALL basfun(xgauss(igauss), spl, fun, i)
         CALL coefeq(xgauss(igauss), idert, iderw, coefs)
         DO iterm=1,kterms
            DO jt=0,nidbas
               DO iw=0,nidbas
                  contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
                       &    fun(iw,iderw(iterm)) * wgauss(igauss)
                  irow=MODULO(i+iw-1,nx) + 1   ! Periodic BC
                  jcol=MODULO(i+jt-1,nx) + 1
                  CALL updtmat(mat, irow, jcol, contrib)
               END DO
            END DO
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun)
   DEALLOCATE(xgauss, wgauss)
   DEALLOCATE(iderw, idert, coefs)
 !
 CONTAINS
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt))
 !
     c(1) = alpha
     idt(1) = 1
     idw(1) = 1
 !
     c(2) = -beta
     idt(2) = 0
     idw(2) = 0
   END SUBROUTINE coefeq
 END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of complex array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE COMPLEX, INTENT(in) :: x(:)
     DOUBLE PRECISION :: sum2
 !
     sum2 = DOT_PRODUCT(x,x)
     norm2 = SQRT(sum2)
   END FUNCTION norm2
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE meshdist(mmode, x)
 !
 !   Construct a 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
     INTEGER, INTENT(in) :: mmode
     DOUBLE PRECISION, INTENT(inout) :: x(0:)
     INTEGER :: nx, nintg
     DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
     DOUBLE PRECISION :: a, b, dx, f0, f1, scal
     INTEGER :: i, k
 !
     nx = SIZE(x)-1
     a=x(0)
     b=x(nx)
     nintg = 10*nx
     ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
     dx = (b-a)/REAL(nintg)
     xint(0) = a
     fint(0) = 0.0d0
     f1 = fdist(xint(0))
     DO i=1,nintg
        f0 = f1
        xint(i) = xint(i-1) + dx
        f1 = fdist(xint(i))
        fint(i) = fint(i-1) + 0.5*(f0+f1)
     END DO
 !
 !  Normalization
 !
     scal = REAL(nx) / fint(nintg)
     fint(0:nintg) = fint(0:nintg) * scal
 !
 !  Obtain mesh point by (inverse) interpolation
 !
     k = 1
     DO i=1,nintg-1
        IF( fint(i) .GE. REAL(k) ) THEN
           x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
                &   (k-fint(i))
           k = k+1
        END IF
     END DO
 !
     DEALLOCATE(xint, fint)
   CONTAINS
     DOUBLE PRECISION FUNCTION fdist(x)
       DOUBLE PRECISION, INTENT(in) :: x
       fdist = 2.0 + COS(mmode*x)
     END FUNCTION fdist
   END SUBROUTINE meshdist
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE pde1dp_cmpl_wsmp_mod
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 !
 PROGRAM main
 !
 !   1D complex PDE with periodic BC
 !
   USE pde1dp_cmpl_wsmp_mod
   USE futils
 !
   IMPLICIT NONE
   TYPE(spline1d) :: splx
   TYPE(zwsmp_mat) :: mat
   TYPE(zwsmp_mat) :: newmat
   INTEGER :: kl, ku, nrank
 !
   CHARACTER(len=128) :: file='pde1dp_cmpl_wsmp.h5'
   INTEGER :: fid
   INTEGER :: nx, nidbas, ngauss, mmode, npt, dim
   LOGICAL :: nlequid
   LOGICAL :: nlsym, nlherm, nlpos
   DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0
   DOUBLE PRECISION :: dx
   DOUBLE COMPLEX :: alpha, beta
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err
   DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef
   DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: newsol, arow
   DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana
   DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
   DOUBLE PRECISION :: err_norm
   INTEGER :: i
 !
   NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt, &
        &            nlsym, nlherm, nlpos
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8             ! Number oh intevals in x
   nidbas = 3         ! Degree of splines
   ngauss = 4         ! Number of Gauss points/interval
   nlequid = .TRUE.   ! Use exact sol. as mesh dist. function if .FALSE.
   mmode = 1          ! Fourier mode
   alpha = (1.0, 1.0) ! Complex "diffusion"
   beta  = 1.0
   npt = 100
   nlsym = .TRUE.    ! Is matrice symmetric
   nlherm = .FALSE.  ! Is matrice hermitian
   nlpos = .TRUE.    ! and positive definite ?
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x axis
 !
   ALLOCATE(xgrid(0:nx))
   dx = 2.d0*pi/REAL(nx,8)
   xgrid = (/ (i*dx,i=0,nx) /)
   IF( .NOT. nlequid ) THEN
      CALL meshdist(mmode, xgrid)
   END IF
   WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NIDBAS', nidbas)
   CALL putarr(fid, '/xgrid', xgrid)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up periodic spline
 !
   CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.)
   WRITE(*,'(a,l6)') 'nlequid =', nlequid
   nrank = nx       ! Rank of the FE matrix
 !
 !   FE matrix assembly
 !
   CALL init(nrank, 2, mat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos)
   CALL get_dim(splx, dim)
   WRITE(*,'(/a,4i6)') 'nrank, dim', nrank, dim
   CALL dismat(splx, mat, alpha, beta)
 !
 !   RHS assembly
 !  
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(splx, rhs, mmode, alpha, beta)
 !
   CALL putarr(fid, '/rhs', rhs)
 !
 !   Factor and solve
 !
 !!$  CALL factor(mat, nlmetis=.TRUE.)
   CALL factor(mat)
   CALL putmat(fid,'/MAT', mat)
   CALL bsolve(mat, rhs, sol)
   CALL putarr(fid, '/sol', sol)
   WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix       = ',get_count(mat)
   WRITE(*,'(a,i8)')  'Number of nonzeros in factors of A  = ',mat%p%iparm(18)
 !
 !   Compute residue
 !
   WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(mat,sol)-rhs)
 !===========================================================================
 !              3.0   Check solution
 !
 !   Exact solution
   ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt))
   dx=2.0d0*pi/REAL(npt,8)
   x = (/ (i*dx, i=0,npt) /)
   solana = COS(mmode*x)
 !
 !   Prolongate solution using periodicity
 !
   ALLOCATE(bcoef(dim))
   bcoef(1:nrank) = sol(1:nrank)
   DO i=nrank+1,dim
      bcoef(i) = bcoef(MODULO(i-1,nrank)+1)
   END DO
 !
 !   Interpolate field
 !
   CALL gridval(splx, x, solcal, 0, bcoef)
 !
   err = ABS(solcal-solana)
   CALL putarr(fid, '/x', x)
   CALL putarr(fid, '/solana', solana)
   CALL putarr(fid, '/solcal', solcal)
   CALL putarr(fid, '/err', err)
 !
 !   Compute discretization error norm by Gauss integration
 !
   err_norm=0.0
   ALLOCATE(xgauss(ngauss), wgauss(ngauss))
   DO i=1,nx
      CALL get_gauss(splx, ngauss, i, xgauss, wgauss)
      CALL gridval(splx, xgauss, solcal(1:ngauss), 0)
      solana(1:ngauss) = COS(mmode*xgauss)
      err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), &
           &          solana(1:ngauss)-solcal(1:ngauss))
      err_norm = err_norm + SUM(wgauss*err(1:ngauss))
   END DO
   err_norm = SQRT(err_norm)
   WRITE(*,'(a,1pe12.3)') 'Discretization error ',  err_norm
 !
 !===========================================================================
 !              4.0 Test of getrow/putrow, getcol/putcol and mcopy
 !
   CALL init(nrank, 2, newmat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos)
   ALLOCATE(arow(nrank), newsol(nrank))
 !
   DO i=1,nrank
      CALL getrow(mat, i, arow)
      CALL putrow(newmat, i, arow)
   END DO
   CALL factor(newmat)
   CALL bsolve(newmat, rhs, newsol)
   WRITE(*,'(/a)') 'putrow/getrow ...'
   WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs)
   WRITE(*,'(a,1pe12.3)') 'Error ',  norm2(sol-newsol)
 !
   DO i=1,nrank
      CALL getcol(mat, i, arow)
      CALL putcol(newmat, i, arow)
   END DO
   CALL factor(newmat)
   CALL bsolve(newmat, rhs, newsol)
   WRITE(*,'(/a)') 'putcol/getcol ...'
   WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs)
   WRITE(*,'(a,1pe12.3)') 'Error ',  norm2(sol-newsol)
 !
   CALL clear_mat(newmat)
   CALL mcopy(mat, newmat)
   WRITE(*,'(/a)') 'mcopy ...'
   newmat%val = (1000.0d0,0.0d0)*newmat%val
   CALL factor(newmat)
   CALL bsolve(newmat, rhs, newsol)
   WRITE(*,'(a, i3)') 'Backsolve the system', newmat%matid
   WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol)
 !
   CALL bsolve(mat, rhs, sol)
   WRITE(*,'(a, i3)') 'Backsolve the system', mat%matid
   WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(sol)
 !
   CALL bsolve(newmat, rhs, newsol)
   WRITE(*,'(a, i3)') 'Backsolve the system', newmat%matid
   WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol)
 !===========================================================================
 !              9.0  Clean up
 !
   DEALLOCATE(x, solcal, solana, err)
   DEALLOCATE(xgauss, wgauss)
   DEALLOCATE(bcoef)
   DEALLOCATE(xgrid)
   DEALLOCATE(rhs, sol)
   DEALLOCATE(arow, newsol)
   CALL destroy(mat)
   CALL destroy(newmat)
   CALL destroy_sp(splx)
   CALL closef(fid)  
 END PROGRAM main
diff --git a/examples/pde1dp_mod.f90 b/examples/pde1dp_mod.f90
index 0a508ac..7c8f6ba 100644
--- a/examples/pde1dp_mod.f90
+++ b/examples/pde1dp_mod.f90
@@ -1,225 +1,225 @@
 !>
 !> @file pde1dp_mod.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE pde1dp_mod
 !
   USE bsplines
   USE matrix
   IMPLICIT NONE
   DOUBLE PRECISION, ALLOCATABLE :: bcoef(:)
   TYPE(spline1d), SAVE :: splx
 !  
 CONTAINS 
   SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function defined in FDIST
 !
     DOUBLE PRECISION, INTENT(in) :: c(5)
     INTEGER, INTENT(iN) :: nx
     DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
     INTEGER :: nintg
     DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
     DOUBLE PRECISION :: a, b, dx, f0, f1, scal
     INTEGER :: i, k
 !
     a=x(0)
     b=x(nx)
     nintg = 10*nx
     ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
     dx = (b-a)/REAL(nintg)
     xint(0) = a
     fint(0) = 0.0d0
     f1 = fdist(xint(0))
     DO i=1,nintg
        f0 = f1
        xint(i) = xint(i-1) + dx
        f1 = fdist(xint(i))
        fint(i) = fint(i-1) + 0.5*(f0+f1)
     END DO
 !
 !  Normalization
 !
     scal = REAL(nx) / fint(nintg)
     fint(0:nintg) = fint(0:nintg) * scal
 !
 !  Obtain mesh point by (inverse) interpolation
 !
     k = 1
     DO i=1,nintg-1
        IF( fint(i) .GE. REAL(k) ) THEN
           x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
                &   (k-fint(i))
           k = k+1
        END IF
     END DO
 !
     DEALLOCATE(xint, fint)
   CONTAINS
     DOUBLE PRECISION FUNCTION fdist(x)
       DOUBLE PRECISION, INTENT(in) :: x
       fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
     END FUNCTION fdist
   END SUBROUTINE meshdist
 !+++
   SUBROUTINE dismat(spl, mat)
 !
 !   Assembly FE matrix (with periodic BC) mat using spline spl
 !
     TYPE(spline1d), INTENT(in) :: spl
     TYPE(periodic_mat), INTENT(inout) :: mat
     INTEGER :: dim, nx, nidbas, ngauss
     INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
     DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
     DOUBLE PRECISION :: contrib
 !
     INTEGER :: kterms                ! Number of terms in weak form
     INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
     DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl, dim, nx, nidbas)
     ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
 !
 !   Weak form
 !
     kterms = mat%mat%nterms
     ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl, ngauss)
     ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
     DO i=1,nx
        CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
        DO igauss=1,ngauss
           CALL basfun(xgauss(igauss), spl, fun, i)
           CALL coefeq(xgauss(igauss), idert, iderw, coefs)
           DO iterm=1,kterms
              DO jt=0,nidbas
                 DO iw=0,nidbas
                    contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
                         &    fun(iw,iderw(iterm)) * wgauss(igauss)
                    irow=MODULO(i+iw-1,nx) + 1   ! Periodic BC
                    jcol=MODULO(i+jt-1,nx) + 1
                    CALL updtmat(mat, irow, jcol, contrib)
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(fun)
     DEALLOCATE(xgauss, wgauss)
     DEALLOCATE(iderw, idert, coefs)
   END SUBROUTINE dismat
 !
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
 !
 ! Mass matrix
 !
     c(1) = 1.0d0
     idt(1) = 0
     idw(1) = 0
   END SUBROUTINE coefeq
 !+++
   SUBROUTINE disrhs(spl, rhs)
 !
 !   Assenbly the RHS using spline spl
 !
     TYPE(spline1d), INTENT(in) :: spl
     DOUBLE PRECISION, INTENT(out) :: rhs(:)
     INTEGER :: dim, nrank, nx, nidbas, ngauss
     INTEGER :: i, igauss, it, irow
     DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
     DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     nrank = SIZE(rhs)
     CALL get_dim(spl, dim, nx, nidbas)
 !
     ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl, ngauss)
     ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
     rhs(:) = 0.0d0
 !
     DO i=1,nx
        CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
        DO igauss=1,ngauss
           CALL basfun(xgauss(igauss), spl, fun, i)
           contrib = wgauss(igauss) * rhseq(xgauss(igauss))
           DO it=0,nidbas
              irow=MODULO(i+it-1,nx) + 1   ! Periodic BC
              rhs(irow) = rhs(irow) + contrib*fun(it,1)
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(fun)
     DEALLOCATE(xgauss, wgauss)
   END SUBROUTINE disrhs
 !
 DOUBLE PRECISION FUNCTION rhseq(x)
   DOUBLE PRECISION, INTENT(in) :: x
   DOUBLE PRECISION :: xarr(1), farr(1)
   INTEGER, SAVE :: icall =0
   xarr(1) = x
   IF( icall.EQ.0 ) THEN
      icall = icall+1
      CALL gridval(splx, xarr, farr, 0, bcoef)
   ELSE
      CALL gridval(splx, xarr, farr, 0)
   END IF
   rhseq = farr(1)
 END FUNCTION rhseq
 
 END MODULE pde1dp_mod
diff --git a/examples/pde2d.f90 b/examples/pde2d.f90
index b881711..f46883f 100644
--- a/examples/pde2d.f90
+++ b/examples/pde2d.f90
@@ -1,409 +1,409 @@
 !>
 !> @file pde2d.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Solving the following 2d PDE using splines:
 !
 !    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
 !    exact solution: f(x,y) = (1-x^2) x^m cos(my)
 !
   USE bsplines
   USE matrix
   USE futils
   USE conmat_mod
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
   LOGICAL :: nlppform, nlconmat
   INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it
   DOUBLE PRECISION :: pi, coefx(5), coefy(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
   TYPE(spline2d) :: splxy
   TYPE(gbmat) :: mat
 !
   CHARACTER(len=128) :: file='pde2d.h5'
   INTEGER :: fid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
   DOUBLE PRECISION :: seconds, mem, dopla
   DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
   INTEGER :: nits=500
 !
   INTERFACE
      SUBROUTINE dismat(spl, mat)
        USE bsplines
        USE matrix
        TYPE(spline2d), INTENT(in) :: spl
        TYPE(gbmat), INTENT(inout) :: mat
      END SUBROUTINE dismat
      SUBROUTINE disrhs(mbess, spl, rhs)
        USE bsplines
        INTEGER, INTENT(in) :: mbess
        TYPE(spline2d), INTENT(in) :: spl
        DOUBLE PRECISION, INTENT(out) :: rhs(:)
      END SUBROUTINE disrhs
      SUBROUTINE meshdist(coefs, x, nx)
        DOUBLE PRECISION, INTENT(in) :: coefs(5)
        INTEGER, INTENT(iN) :: nx
        DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
      END SUBROUTINE meshdist
      SUBROUTINE ibcmat(mat, ny)
        USE matrix
        TYPE(gbmat), INTENT(inout) :: mat
        INTEGER, INTENT(in) :: ny
      END SUBROUTINE ibcmat
      SUBROUTINE ibcrhs(rhs, ny)
        DOUBLE PRECISION, INTENT(inout) :: rhs(:)
        INTEGER, INTENT(in) :: ny
      END SUBROUTINE ibcrhs
 !!$     SUBROUTINE coefeq_poisson(x, y, idt, idw, c)
 !!$       DOUBLE PRECISION, INTENT(in) :: x, y
 !!$       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
 !!$       DOUBLE PRECISION, INTENT(out) :: c(:)
 !!$     END SUBROUTINE coefeq_poisson
   END INTERFACE
 !
   NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlconmat, &
        &            coefx, coefy
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intervals in x
   ny = 8              ! Number of intervals in y
   nidbas = (/3,3/)    ! Degree of splines
   ngauss = (/4,4/)    ! Number of Gauss points/interval
   mbess = 2           ! Exponent of differential problem
   nterms = 2          ! Number of terms in weak form
   nlppform = .TRUE.   ! Use PPFORM for gridval or not
   nlconmat = .TRUE.   ! Use CONMAT instead of DISMAT
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x (=radial) & y (=poloidal) axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
 !
   WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NY', ny)
   CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
   CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
   CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
   CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
   CALL attach(fid, '/', 'MBESS', mbess)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   CALL set_spline(nidbas, ngauss, &
        &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
 !!$  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
 !!$  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
 !
 !   FE matrix assembly
 !
   nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
   kl = (nidbas(1)+1)*ny -1     ! Number of sub-diagnonals
   ku = kl                      ! Number of super-diagnonals
   WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
 !
   CALL init(kl, ku, nrank, nterms, mat)
   t0 = seconds()
   IF(nlconmat) THEN
      CALL conmat(splxy, mat, coefeq_poisson)
   ELSE
      CALL dismat(splxy, mat)
   END IF
   tmat = seconds() - t0
   CALL putmat(fid, '/MAT0', mat, 'Assembled GB matrice')
   ALLOCATE(arr(nrank))
 !
 !   BC on Matrix
 !
   IF(nrank.LT.100) &
        & WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', mat%val(kl+ku+1,:)
   CALL ibcmat(mat, ny)
   IF(nrank.LT.100) &
        &   WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', mat%val(kl+ku+1,:)
 !
 !   RHS assembly
 !
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(mbess, splxy, rhs)
 !
 !   BC on RHS
 !
   CALL ibcrhs(rhs, ny)
 
   CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
   CALL putmat(fid, '/MAT1', mat, 'GB matrice with BC')
   WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
   t0 = seconds()
   CALL factor(mat)
   tfact = seconds() - t0
   gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
 
   t0 = seconds()
   CALL bsolve(mat, rhs, sol)
 !
 !   Backtransform of solution
 !
   sol(1:ny-1) = sol(ny)
 !
 !   Spline coefficients, taking into account of periodicity in y
 !   Note: in SOL, y was numbered first.
 !
   dimx = splxy%sp1%dim
   dimy = splxy%sp2%dim
   ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,ny) + i*ny + 1
         bcoef(i,j) = sol(ij)
      END DO
   END DO
   WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
 !
   tsolv = seconds() - t0
   gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
   CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
 !===========================================================================
 !              4.0 Check the solution
 !
 !   Check function values computed with various method
 !
   ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
      END DO
   END DO
   jder = (/0,0/)
 !
 !   Compute PPFORM/BCOEFS at first call to gridval
   CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
 !
   WRITE(*,'(/a)') '*** Checking solutions'
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   END DO
   tgrid = (seconds() - t0)/REAL(nits)
   errsol = solana - solcal
   IF( SIZE(bcoef,2) .LE. 10 ) THEN
      CALL prnmat('BCOEF', bcoef)
      CALL prnmat('SOLANA', solana)
      CALL prnmat('SOLCAL', solcal)
      CALL prnmat('ERRSOL', errsol)
   END IF
   WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
        &    norm2(errsol) / norm2(solana)
   WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
 !
   CALL putarr(fid, '/xgrid', xgrid, 'r')
   CALL putarr(fid, '/ygrid', ygrid, '\theta')
   CALL putarr(fid, '/sol', solcal, 'Solutions')
   CALL putarr(fid, '/solana', solana,'Exact solutions')
   CALL putarr(fid, '/errors', errsol, 'Errors')
 !
 !   Check derivatives d/dx and d/dy
 !
   WRITE(*,'(/a)') '*** Checking gradient'
   DO i=0,nx
      DO j=0,ny
         IF( mbess .EQ. 0 ) THEN
            solana(i,j) = -2.0d0 * xgrid(i)
         ELSE
            solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
              &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
         END IF
      END DO
   END DO
 !
   jder = (/1,0/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   errsol = solana - solcal
   CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
 !
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
      END DO
   END DO
 !
   jder = (/0,1/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
   errsol = solana - solcal
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
 !===========================================================================
 !              9.0  Epilogue
 !
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
   WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
 !
   DEALLOCATE(xgrid, rhs, sol)
   DEALLOCATE(solcal, solana, errsol)
   DEALLOCATE(bcoef)
   DEALLOCATE(arr)
   CALL destroy_sp(splxy)
   CALL destroy(mat)
 !
   CALL closef(fid)
 !===========================================================================
 !
 CONTAINS
   SUBROUTINE coefeq_poisson(x, y, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x, y
     INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
 ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
 !
     c(1) = x        ! 
     idt(1,1) = 1
     idt(1,2) = 0
     idw(1,1) = 1
     idw(1,2) = 0
 !
     c(2) = 1.d0/x
     idt(2,1) = 0
     idt(2,2) = 1
     idw(2,1) = 0
     idw(2,2) = 1
   END SUBROUTINE coefeq_poisson
 !
 !+++
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:,:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i, j
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        DO j=1,SIZE(x,2)
           sum2 = sum2 + x(i,j)**2
        END DO
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
   SUBROUTINE prnmat(label, mat)
     CHARACTER(len=*) :: label
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
     INTEGER :: i
     WRITE(*,'(/a)') TRIM(label)
     DO i=1,SIZE(mat,1)
        WRITE(*,'(10(1pe12.3))') mat(i,:)
     END DO
   END SUBROUTINE prnmat
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
diff --git a/examples/pde2d_mumps.f90 b/examples/pde2d_mumps.f90
index 772fce2..8a2e120 100644
--- a/examples/pde2d_mumps.f90
+++ b/examples/pde2d_mumps.f90
@@ -1,937 +1,937 @@
 !>
 !> @file pde2d_mumps.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !  Solving the 2d PDE using splines and MUMPS non-symmetric and symmetric 
 !  matrix
 !
 !    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
 !    exact solution: f(x,y) = (1-x^2) x^m cos(my)
 !
 MODULE pde2d_mumps_mod
   USE bsplines
   USE mumps_bsplines
   IMPLICIT NONE
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dismat(spl, mat)
 !
 !   Assembly of FE matrix mat using spline spl
 !
     TYPE(spline2d), INTENT(in)       :: spl
     TYPE(mumps_mat), INTENT(inout) :: mat
 !
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2
     INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
     DOUBLE PRECISION:: contrib
 !
     INTEGER :: kterms         ! Number of terms in weak form
     INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
     DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
     INTEGER, ALLOCATABLE :: left1(:), left2(:)
 !
     INTEGER :: istart, iend
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
 !!$    WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
 !!$    WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !
 !   Weak form
 !
     kterms = mat%nterms
     ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
     ALLOCATE(coefs(kterms,ng1,ng2))
 !
     ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
     ALLOCATE(fun2(0:nidbas2,0:1,ng2)) !
 !
 !   Matrix partition
 !
     istart = mat%istart
     iend   = mat%iend
 !===========================================================================
 !              2.0 Assembly loop
 !
     ALLOCATE(left1(ng1))
     ALLOCATE(left2(ng2))
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        left1 = i
        CALL basfun(xg1, spl%sp1, fun1, left1)
        DO j=1,n2
           CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
           left2 = j
           CALL basfun(xg2, spl%sp2, fun2, left2)
 !
           DO ig1=1,ng1
              DO ig2=1,ng2
                 CALL coefeq(xg1(ig1), xg2(ig2), &
                      &      idert(:,:,ig1,ig2), &
                      &      iderw(:,:,ig1,ig2), &
                      &      coefs(:,ig1,ig2))
              END DO
           END DO
 !
           DO iw1=0,nidbas1  ! Weight function in dir 1
              igw1 = i+iw1
              DO iw2=0,nidbas2  ! Weight function in dir 2
                 igw2 = MODULO(j+iw2-1, n2) + 1
                 irow = igw2 + (igw1-1)*n2
                 IF( irow.GE.istart .AND. irow.LE.iend) THEN
                    DO it1=0,nidbas1  ! Test function in dir 1
                       igt1 = i+it1
                       DO it2=0,nidbas2  ! Test function in dir 2
                          igt2 = MODULO(j+it2-1, n2) + 1
                          jcol = igt2 + (igt1-1)*n2
 !-------------
                          contrib = 0.0d0
                          DO ig1=1,ng1
                             DO ig2=1,ng2
                                DO iterm=1,kterms
                                   contrib = contrib + &
                                        &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
                                        &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
                                        &    coefs(iterm,ig1,ig2) *                 &
                                        &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
                                        &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
                                        &    wg1(ig1) * wg2(ig2)
                                END DO
                             END DO
                          END DO
                          CALL updtmat(mat, irow, jcol, contrib)
 !-------------
                       END DO
                    END DO
                 END IF
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
     DEALLOCATE(idert, iderw, coefs)
     DEALLOCATE(left1,left2)
 !
   CONTAINS
     SUBROUTINE coefeq(x, y, idt, idw, c)
       DOUBLE PRECISION, INTENT(in) :: x, y
       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
       DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
 !
 ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
 !
       c(1) = x        ! 
       idt(1,1) = 1
       idt(1,2) = 0
       idw(1,1) = 1
       idw(1,2) = 0
       !
       c(2) = 1.d0/x
       idt(2,1) = 0
       idt(2,2) = 1
       idw(2,1) = 0
       idw(2,2) = 1
     END SUBROUTINE coefeq
   END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE disrhs(mbess, spl, rhs)
 !
 !   Assembly the RHS using 2d spline spl
 !
     INTEGER, INTENT(in) :: mbess
     TYPE(spline2d), INTENT(in) :: spl
     DOUBLE PRECISION, INTENT(out) :: rhs(:)
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
 !
     ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !===========================================================================
 !              2.0 Assembly loop
 !
     nrank = SIZE(rhs)
     rhs(1:nrank) = 0.0d0
 !
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), spl%sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                 contrib = wg1(ig1)*wg2(ig2) * &
                      &    rhseq(xg1(ig1),xg2(ig2), mbess)
                 DO k1=0,nidbas1
                    i1 = i+k1
                    DO k2=0,nidbas2
                       j2 = MODULO(j+k2-1,n2) + 1
                       ij = j2 + (i1-1)*n2
                       rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
 !
   CONTAINS
     DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
       DOUBLE PRECISION, INTENT(in) :: x1, x2
       INTEGER, INTENT(in) :: m
       rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
     END FUNCTION rhseq
   END SUBROUTINE disrhs
 !
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcmat(mat, ny)
 !
 !   Apply BC on matrix
 !
     TYPE(mumps_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank, i, j
     DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = mat%rank
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum on the NY-th row
 !
     ALLOCATE(zsum(nrank), arr(nrank))
     zsum = 0.0d0
     DO i=1,ny
        arr = 0.0d0
        CALL getrow(mat, i, arr)
        zsum(:) = zsum(:) + arr(:)
     END DO
     IF(mat%nlsym) THEN
        zsum(ny) = SUM(zsum(1:ny))   ! using symmetry
     END IF
     CALL putrow(mat, ny, zsum)
 !
 !   The horizontal sum on the NY-th column
 !
     IF( .NOT.mat%nlsym) THEN
        zsum = 0.0d0
        DO j=1,ny
           arr = 0.0d0
           CALL getcol(mat, j, arr)
           zsum(ny:) = zsum(ny:) + arr(ny:)
        END DO
        CALL putcol(mat, ny, zsum)
     END IF
 !
 !   The away operator
 !
     IF( .NOT.mat%nlsym) THEN
        DO j = 1,ny-1
           arr = 0.0d0; arr(j) = 1.0d0
           CALL putcol(mat, j, arr)     
        END DO
     END IF
 !
     DO i = 1,ny-1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
     DEALLOCATE(zsum)
     DEALLOCATE(arr)
 !  
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
        ALLOCATE(arr(nrank))
        IF( .NOT.mat%nlsym) THEN
           DO j = nrank, nrank-ny+1, -1
              arr = 0.0d0; arr(j) = 1.0d0
              CALL putcol(mat, j, arr)     
           END DO
        END IF
 !
        DO i = nrank, nrank-ny+1, -1
           arr = 0.0d0; arr(i) = 1.0d0
           CALL putrow(mat, i, arr)     
        END DO
        DEALLOCATE(arr)
 !===========================================================================
 !              9.0  Epilogue
 !
   END SUBROUTINE ibcmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcrhs(rhs, ny)
 !
 !   Apply BC on RHS
 !
     DOUBLE PRECISION, INTENT(inout) :: rhs(:)
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank
     DOUBLE PRECISION :: zsum
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = SIZE(rhs,1)
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum
 !
     zsum = SUM(rhs(1:ny))
     rhs(ny) = zsum
     rhs(1:ny-1) = 0.0d0
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     rhs(nrank-ny+1:nrank) = 0.0d0
   END SUBROUTINE ibcrhs
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE write_matrix(lun, mat, comm)
 !
 !   Write the distribute matrix to (single) file
 !
     INCLUDE 'mpif.h'
 !
     INTEGER             :: lun
     TYPE(mumps_mat)     :: mat
     INTEGER, INTENT(in) :: comm
 !
     INTEGER :: nprocs, me, ierr
     INTEGER :: nrank, nnz, nnz_loc, istart, iend, nloc
     INTEGER :: i
     INTEGER, ALLOCATABLE :: displs(:), nlocs(:), cols(:), irow(:)
     DOUBLE PRECISION, ALLOCATABLE :: val(:)
 !
     CALL mpi_comm_size(comm, nprocs, ierr)
     CALL mpi_comm_rank(comm, me, ierr)
 !
     IF(.NOT.ASSOCIATED(mat%val)) THEN
        WRITE(*,'(a)') 'WRITE_MATRIX: MUMPS matrix does not exist!'
        STOP
     END IF
 !
 !    Info on matrix
 !
 !!$    IF(me.EQ.0) THEN
 !!$       s0 = mat%nnz_start-1
 !!$       DO i=mat%istart,mat%iend
 !!$          s=mat%irow(i)-s0
 !!$          e=mat%irow(i+1)-1-s0
 !!$          WRITE(*,'(a,i6,1pe12.3)') 'nnz, Sum(val)', e-s+1, SUM(mat%val(s:e))
 !!$       END DO
 !!$    END IF
 !
     nrank = mat%rank
     nnz_loc = mat%nnz_loc
     nnz = mat%nnz
     istart = mat%istart
     iend = mat%iend
 !
     IF(me.EQ.0) THEN
        WRITE(lun) nrank, nnz
     END IF
 !
 !   Write irow
 !
     nloc = iend-istart+1
     IF (me.EQ.0) THEN
        ALLOCATE(displs(0:nprocs))
        ALLOCATE(nlocs(0:nprocs-1))
        ALLOCATE(irow(nrank+1))
     END IF
     CALL mpi_gather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, 0, comm, ierr)
     IF(me.EQ.0) THEN
        displs(0) = 0
        DO i=0,nprocs-1
           displs(i+1) = displs(i)+nlocs(i)
        END DO
     END IF
     CALL mpi_gatherv(mat%irow, nloc, MPI_INTEGER, &
          &           irow, nlocs, displs, MPI_INTEGER, 0, comm, ierr)
     IF(me.EQ.0) THEN
        irow(nrank+1) = nnz+1
        WRITE(lun) irow
        DEALLOCATE(irow)
     END IF
 !
 !   Write cols
 !
     nloc = mat%nnz_loc
     IF(me.EQ.0) THEN
        ALLOCATE(cols(nnz))
     END IF
     CALL mpi_gather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, 0, comm, ierr)
     IF(me.EQ.0) THEN
        displs(0) = 0
        DO i=0,nprocs-1
           displs(i+1) = displs(i)+nlocs(i)
        END DO
     END IF
     CALL mpi_gatherv(mat%cols, nloc, MPI_INTEGER, &
          &           cols, nlocs, displs, MPI_INTEGER, 0, comm, ierr)
     IF(me.EQ.0) THEN
        WRITE(lun) cols
        DEALLOCATE(cols)
     END IF
 !
 !   Write val (Same data partition as "cols"
 !
     IF(me.EQ.0) THEN
        ALLOCATE(val(nnz))
     END IF
     CALL mpi_gatherv(mat%val, nloc, MPI_DOUBLE_PRECISION, &
          &           val, nlocs, displs, MPI_DOUBLE_PRECISION, 0, comm, ierr)
     IF(me.EQ.0) THEN
        WRITE(lun) val
        DEALLOCATE(val)
     END IF
 !
 !  Epilogue
 !
     IF(me.EQ.0) THEN
        DEALLOCATE(displs, nlocs)
     END IF
   END SUBROUTINE write_matrix
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE pde2d_mumps_mod
 PROGRAM main
   USE pde2d_mumps_mod
   USE futils
 !
   IMPLICIT NONE
   INCLUDE 'mpif.h'
   INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
   LOGICAL :: debug_mumps=.FALSE.
   LOGICAL :: nlppform
   INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
   DOUBLE PRECISION :: pi, coefx(5), coefy(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
   TYPE(spline2d) :: splxy
   TYPE(mumps_mat) :: mat
 !
   CHARACTER(len=128) :: file='pde2d_mumps.h5'
   INTEGER :: fid
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
   DOUBLE PRECISION :: seconds, mem
   DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1
   DOUBLE PRECISION :: tconv, treord
   INTEGER :: nits=100
   LOGICAL :: nlsym, nlpos
   LOGICAL :: nlmetis, nlforce_zero
   LOGICAL :: nlserial
 !
   INTEGER :: ierr, me
   INTEGER(kind=8) :: nzfact
   DOUBLE PRECISION :: mem_loc
 !
   CHARACTER(len=128) :: matfile=''
 !
   NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlsym, nlpos,&
        &            nlmetis, nlforce_zero, nlserial, coefx, coefy, matfile, &
        &            debug_mumps
 !===========================================================================
 !              1.0 Prologue
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intervals in x
   ny = 8              ! Number of intervals in y
   nidbas = (/3,3/)    ! Degree of splines
   ngauss = (/4,4/)    ! Number of Gauss points/interval
   mbess = 2           ! Exponent of differential problem
   nterms = 2          ! Number of terms in weak form
   nlppform = .TRUE.   ! Use PPFORM for gridval or not
   nlsym    = .FALSE.  ! Symmetric or unsymmetric matrix
   nlpos    = .TRUE.   ! Positive definite matrix
   nlmetis = .FALSE.   ! Use metis ordering or minimum degree
   nlserial = .TRUE.   ! Serial. The solver is duplicated on each process. Otherwise
                       ! the solver matrix is partionned among the processes.
   nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   matfile = ''        ! Save matrix file to matfile if not empty
 !
   IF(me.EQ.0) THEN
      READ(*,newrun)
      WRITE(*,newrun)
   END IF
   CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(mbess, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nterms, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nlppform, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nlpos, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nlmetis, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nlforce_zero, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(debug_mumps, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nlserial, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(coefx, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(coefy, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(matfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
 !
 !   Define grid on x (=radial) & y (=poloidal) axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
 !
 !   Create hdf5 file
 !
   IF(me.EQ.0) THEN
      CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
      CALL attach(fid, '/', 'NX', nx)
      CALL attach(fid, '/', 'NY', ny)
      CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
      CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
      CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
      CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
      CALL attach(fid, '/', 'MBESS', mbess)
   END IF
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   t0 = seconds()
   CALL set_spline(nidbas, ngauss, &
        &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
 !
 !   FE matrix assembly
 !
   nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
   IF(me.EQ.0) WRITE(*,'(a,i8)') 'nrank', nrank
 !
   IF(nlserial) THEN   ! The solver is duplicated
      CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero,&
           &    nlsym=nlsym, nlpos=nlpos)
   ELSE  ! The solver is distributed
      CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero,&
           &    nlsym=nlsym, nlpos=nlpos, comm_in=MPI_COMM_WORLD)
   END IF
   mat%mumps_par%ICNTL(23) = 400
   IF(me.EQ.0) THEN
      WRITE(*,'(a/(20i6))') 'ICNTL =', mat%mumps_par%ICNTL
   END IF
   WRITE(*,'(a,i4.4,a,3i16)') 'PE', me, ' istart, iend, nloc', mat%istart, mat%iend, &
        &        mat%iend-mat%istart+1
 !
   CALL dismat(splxy, mat)
 !
 !   BC on Matrix
 !
   CALL ibcmat(mat, ny)
   tmat = seconds() - t0
 !
 !   RHS assembly
 !
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(mbess, splxy, rhs)
 !
 !   BC on RHS
 !
   CALL ibcrhs(rhs, ny)
 !
   mem_loc = mem()
   CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after DISMAT')
   IF(me.EQ.0) THEN
      CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
   END IF
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
 
   t0 = seconds()
   CALL to_mat(mat)
   WRITE(*,'(a/(10i6))') 'MInmax IRN_loc', MINVAL(mat%mumps_par%IRN_loc), MAXVAL(mat%mumps_par%IRN_loc)
   WRITE(*,'(a/(10i6))') 'JCN_loc', MINVAL(mat%mumps_par%JCN_loc), MAXVAL(mat%mumps_par%JCN_loc)
   tconv = seconds() -t0
   CALL minmax_i(mat%nnz_loc, MPI_COMM_WORLD, 'local nnz')
   IF(me.EQ.0) THEN 
      WRITE(*,'(a,i16)') 'Number of non-zeros of matrix = ', mat%nnz
   END IF
 !
 !   Write Matrix and RHS to file
 !
   IF(LEN_TRIM(matfile).GT.0) THEN 
      IF(me.EQ.0) THEN
         OPEN(99, file=matfile, form='unformatted')
      END IF
      CALL write_matrix(99, mat, MPI_COMM_WORLD)
   END IF
 !
   t0 = seconds()
   CALL reord_mat(mat, nlmetis=nlmetis, debug=debug_mumps)
   treord = seconds() - t0
 !
   t0 = seconds()
   CALL numfact(mat, debug=debug_mumps)
   tfact = seconds() - t0
 !
 !
   mem_loc = mem()
   CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after FACTOR')
   IF(me.EQ.0) THEN 
      nzfact = mat%mumps_par%INFOG(29)
      IF(nzfact<0) THEN
         nzfact = -nzfact*1000000
      END IF
      WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A  = ',nzfact
      WRITE(*,'(a,f12.2)')  'Number of factorization MFLOPS      = ',&
           &               mat%mumps_par%RINFOG(3)/1.e6
   END IF
   gflops1 = mat%mumps_par%RINFOG(3) / tfact / 1.d9
 !
   CALL bsolve(mat, rhs, sol, debug=debug_mumps)
 !
   IF(LEN_TRIM(matfile).GT.0) THEN
      IF(me.EQ.0) THEN
         WRITE(99) rhs
         WRITE(99) sol
         CLOSE(99)
      END IF
   END IF
 !
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL bsolve(mat, rhs, sol)
      sol(1:ny-1) = sol(ny)
   END DO
   tsolv = (seconds() - t0)/REAL(nits)
 !
   mem_loc = mem()
   CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after BSOLVE')
 !
 !   Spline coefficients, taking into account of periodicity in y
 !   Note: in SOL, y was numbered first.
 !
   dimx = splxy%sp1%dim
   dimy = splxy%sp2%dim
   ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,ny) + i*ny + 1
         bcoef(i,j) = sol(ij)
      END DO
   END DO
   IF(me.EQ.0) THEN 
      WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
      CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
   END IF
 !===========================================================================
 !              4.0 Check the solution
 !
 !   Check function values computed with various method
 !
   IF(me.EQ.0) THEN
      ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
      DO i=0,nx
         DO j=0,ny
            solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
         END DO
      END DO
      jder = (/0,0/)
 !
 !   Compute PPFORM/BCOEFS at first call to gridval
      CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
 !
      WRITE(*,'(/a)') '*** Checking solutions'
      t0 = seconds()
      DO it=1,nits   ! nits iterations for timing
         CALL gridval(splxy, xgrid, ygrid, solcal, jder)
      END DO
      tgrid = (seconds() - t0)/REAL(nits)
      errsol = solana - solcal
      IF( SIZE(bcoef,2) .LE. 10 ) THEN
         CALL prnmat('BCOEF', bcoef)
         CALL prnmat('SOLANA', solana)
         CALL prnmat('SOLCAL', solcal)
         CALL prnmat('ERRSOL', errsol)
      END IF
      WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
           &    norm2(errsol) / norm2(solana)
      WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
 !
      CALL putarr(fid, '/xgrid', xgrid, 'r')
      CALL putarr(fid, '/ygrid', ygrid, '\theta')
      CALL putarr(fid, '/sol', solcal, 'Solutions')
      CALL putarr(fid, '/solana', solana,'Exact solutions')
      CALL putarr(fid, '/errors', errsol, 'Errors')
 !
 !   Check derivatives d/dx and d/dy
 !
      WRITE(*,'(/a)') '*** Checking gradient'
      DO i=0,nx
         DO j=0,ny
            IF( mbess .EQ. 0 ) THEN
               solana(i,j) = -2.0d0 * xgrid(i)
            ELSE
               solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
                    &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
            END IF
         END DO
      END DO
 !
      jder = (/1,0/)
      CALL gridval(splxy, xgrid, ygrid, solcal, jder)
      errsol = solana - solcal
      CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
      WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
 !
      DO i=0,nx
         DO j=0,ny
            solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
         END DO
      END DO
 !
      jder = (/0,1/)
      CALL gridval(splxy, xgrid, ygrid, solcal, jder)
      CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
      errsol = solana - solcal
      WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
 !
      WRITE(*,'(/a)') '---'
      WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
      WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s)   ', tconv
      WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s)      ', treord
      WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
      WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
      WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
      WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
      DEALLOCATE(solcal, solana, errsol)
   END IF
 !===========================================================================
 !              5.0 Clear the matrix and recompute
 !
   IF(me.EQ.0) WRITE(*,'(/a)') 'Recompute the solver ...'
   t0 = seconds()
   CALL clear_mat(mat)
   CALL dismat(splxy, mat)
   CALL ibcmat(mat, ny)
   tmat = seconds()-t0
 !
   t0 = seconds()
   CALL numfact(mat, debug=debug_mumps)
   tfact = seconds()-t0
   gflops1 = mat%mumps_par%RINFOG(3) / tfact / 1.d9
 !
   t0 = seconds()
   ALLOCATE(newsol(nrank))
   CALL bsolve(mat, rhs, newsol)
   newsol(1:ny-1) = newsol(ny)
   tsolv = seconds()-t0
 !
   IF(me.EQ.0) THEN
      WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
      WRITE(*,'(/a)') '---'
      WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
      WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
      WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
      WRITE(*,'(a,1pe12.3)') 'Total (s)                     ', tmat+tfact+tsolv
      WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
   END IF
 !
   DEALLOCATE(newsol)
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(xgrid, rhs, sol)
   DEALLOCATE(bcoef)
   CALL destroy_sp(splxy)
   CALL destroy(mat)
 !
   IF(me.EQ.0) CALL closef(fid)
   CALL mpi_finalize(ierr)
 !===========================================================================
 !
 CONTAINS
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:,:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i, j
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        DO j=1,SIZE(x,2)
           sum2 = sum2 + x(i,j)**2
        END DO
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
 !
   SUBROUTINE prnmat(label, mat)
     CHARACTER(len=*) :: label
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
     INTEGER :: i
     WRITE(*,'(/a)') TRIM(label)
     DO i=1,SIZE(mat,1)
        WRITE(*,'(10(1pe12.3))') mat(i,:)
     END DO
   END SUBROUTINE prnmat
 !
   SUBROUTINE minmax_i(k, comm, str)
     CHARACTER(len=*), INTENT(in) :: str
     INTEGER, INTENT(in)          :: k
     INTEGER, INTENT(in)          :: comm
     INTEGER :: me, ierr, kmin, kmax
     CALL mpi_comm_rank(comm, me, ierr)
     CALL mpi_reduce(k, kmin, 1, MPI_INTEGER, MPI_MIN, 0, comm, ierr)
     CALL mpi_reduce(k, kmax, 1, MPI_INTEGER, MPI_MAX, 0, comm, ierr)
     IF( me.EQ.0 ) THEN
        WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax
     END IF
   END SUBROUTINE minmax_i
 !
   SUBROUTINE minmax_r(x, comm, str)
     CHARACTER(len=*), INTENT(in) :: str
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(in)          :: comm
     INTEGER :: me, ierr
     DOUBLE PRECISION ::  xmin, xmax
     CALL mpi_comm_rank(comm, me, ierr)
     CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr)
     CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr)
     IF( me.EQ.0 ) THEN
        WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax
     END IF
   END SUBROUTINE minmax_r
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
diff --git a/examples/pde2d_nh.f90 b/examples/pde2d_nh.f90
index ee5a008..98607b9 100644
--- a/examples/pde2d_nh.f90
+++ b/examples/pde2d_nh.f90
@@ -1,684 +1,684 @@
 !>
 !> @file pde2d_nh.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !  Solving the following 2d PDE using splines:
 !
 !    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my),
 !     with BC: f(x=1,y) = cos(y)
 !
 !    Exact solution: f(x,y) = (1-x^2) x^m cos(my) + x*cos(y)
 !
 MODULE pde2d_nh_mod
   USE bsplines
   USE matrix
   IMPLICIT NONE
 !
   LOGICAL :: nlfix
 CONTAINS
   SUBROUTINE dismat(spl, mat)
  !
  !   Assembly of FE matrix mat using spline spl
  !
     TYPE(spline2d), INTENT(in) :: spl
     TYPE(gbmat), INTENT(inout) :: mat
  !
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2
     INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION:: contrib
  !
     INTEGER :: kterms         ! Number of terms in weak form
     INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw  ! Derivative order
     DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE  :: coefs ! Terms in weak form
  !===========================================================================
  !              1.0 Prologue
  !
  !   Properties of spline space
  !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
     WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
     WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
  !
  !   Weak form
  !
     kterms = mat%nterms
     ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms))
  !
     ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative
     ALLOCATE(fun2(0:nidbas2,0:1)) ! 
  !
  !   Gauss quadature
  !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng2), wg2(ng2))
  !===========================================================================
  !              2.0 Assembly loop
  !
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), spl%sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                 CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs)
                 DO iterm=1,kterms
                    DO iw1=0,nidbas1  ! Weight function in dir 1
                       igw1 = i+iw1
                       DO iw2=0,nidbas2  ! Weight function in dir 2
                          igw2 = MODULO(j+iw2-1, n2) + 1
                          irow = igw2 + (igw1-1)*n2
                          DO it1=0,nidbas1  ! Test function in dir 1
                             igt1 = i+it1
                             DO it2=0,nidbas2  ! Test function in dir 2
                                igt2 = MODULO(j+it2-1, n2) + 1
                                jcol = igt2 + (igt1-1)*n2
                                contrib = fun1(iw1,iderw(iterm,1)) * &
                                     &    fun2(iw2,iderw(iterm,2)) * &
                                     &    coefs(iterm) *             &
                                     &    fun2(it2,idert(iterm,2)) * &
                                     &    fun1(it1,idert(iterm,1)) * &
                                     &    wg1(ig1) * wg2(ig2)
                                CALL updtmat(mat, irow, jcol, contrib)
                             END DO
                          END DO
                       END DO
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
  !===========================================================================
  !              9.0  Epilogue
  !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
     DEALLOCATE(idert, iderw, coefs)
  !
   CONTAINS
     SUBROUTINE coefeq(x, y, idt, idw, c)
       DOUBLE PRECISION, INTENT(in) :: x, y
       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
       DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
 !
 ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
 !
       c(1) = x        ! 
       idt(1,1) = 1
       idt(1,2) = 0
       idw(1,1) = 1
       idw(1,2) = 0
 !
       c(2) = 1.d0/x
       idt(2,1) = 0
       idt(2,2) = 1
       idw(2,1) = 0
       idw(2,2) = 1
     END SUBROUTINE coefeq
   END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE disrhs(mbess, spl, rhs)
 !
 !   Assembly the RHS using 2d spline spl
 !
     IMPLICIT NONE
     INTEGER, INTENT(in) :: mbess
     TYPE(spline2d), INTENT(in) :: spl
     DOUBLE PRECISION, INTENT(out) :: rhs(:)
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
 !
     ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng2), wg2(ng2))
 !===========================================================================
 !              2.0 Assembly loop
 !
     nrank = SIZE(rhs)
     rhs(1:nrank) = 0.0d0
 !
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), spl%sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                 contrib = wg1(ig1)*wg2(ig2) * &
                      &    rhseq(xg1(ig1),xg2(ig2), mbess)
                 DO k1=0,nidbas1
                    i1 = i+k1
                    DO k2=0,nidbas2
                       j2 = MODULO(j+k2-1,n2) + 1
                       ij = j2 + (i1-1)*n2
                       rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
 !
   CONTAINS
     DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
       DOUBLE PRECISION, INTENT(in) :: x1, x2
       INTEGER, INTENT(in) :: m
       rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
     END FUNCTION rhseq
   END SUBROUTINE disrhs
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcmat(mat, spl)
 !
 !   Apply BC on matrix
 !
     IMPLICIT NONE
     TYPE(gbmat), INTENT(inout) :: mat
     TYPE(spline2d) :: spl
     INTEGER :: nx, ndim1, nidbas1
     INTEGER :: ny, ndim2, nidbas2
     INTEGER :: kl, ku, nrank, i, j
     INTEGER :: krow, kcol, jf
     DOUBLE PRECISION :: yg
     DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:), fun(:,:)
 !===========================================================================
 !              1.0 Prologue
 !
     CALL get_dim(spl%sp1, ndim1, nx, nidbas1)
     CALL get_dim(spl%sp2, ndim2, ny, nidbas2)
 !
     kl = mat%kl
     ku = mat%ku
     nrank = mat%rank
     ALLOCATE(zsum(nrank), arr(nrank))
     ALLOCATE(fun(0:nidbas2,1))
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum on the NY-th row
 !
     zsum = 0.0d0
     DO i=1,ny
        arr = 0.0d0
        CALL getrow(mat, i, arr)
        DO j=1,ny+ku
           zsum(j) = zsum(j) + arr(j)
        END DO
     END DO
     CALL putrow(mat, ny, zsum)
 !
 !   The horizontal sum on the NY-th column
 !
     zsum = 0.0d0
     DO j=1,ny
        arr = 0.0d0
        CALL getcol(mat, j, arr)
        DO i=ny,ny+kl
           zsum(i) = zsum(i) + arr(i)
        END DO
     END DO
     CALL putcol(mat, ny, zsum)
 !
 !   The away operator
 !
     DO j = 1,ny-1
        arr = 0.0d0; arr(j) = 1.0d0
        CALL putcol(mat, j, arr)     
     END DO
 !
     DO i = 1,ny-1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !  
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     i=nx+nidbas1     ! The last spline in X
     DO j=1,ny
        krow=(i-1)*ny+j
        IF(MODULO(nidbas2,2) .EQ. 0 .AND. nlfix) THEN
           yg = (spl%sp2%knots(j-1)+spl%sp2%knots(j))/2.0d0
        ELSE
           yg = spl%sp2%knots(j-1)
        END IF
        CALL basfun(yg, spl%sp2, fun, j)
        arr = 0.0d0
        DO jf=0,nidbas2
           kcol=(i-1)*ny + MODULO(jf+j-1,ny)+1
           arr(kcol) = arr(kcol)+fun(jf,1)
        END DO
        CALL putrow(mat, krow, arr)
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(zsum, arr)
     DEALLOCATE(fun)
 !
   END SUBROUTINE ibcmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcrhs(rhs, spl)
 !
 !   Apply BC on RHS
 !
     IMPLICIT NONE
     DOUBLE PRECISION, INTENT(inout) :: rhs(:)
     TYPE(spline2d) :: spl
     INTEGER :: nx, ndim1, nidbas1
     INTEGER :: ny, ndim2, nidbas2
     INTEGER :: nrank
     INTEGER :: i, j, k
     DOUBLE PRECISION :: xg, yg, zsum
 !===========================================================================
 !              1.0 Prologue
 !
     CALL get_dim(spl%sp1, ndim1, nx, nidbas1)
     CALL get_dim(spl%sp2, ndim2, ny, nidbas2)
     nrank = SIZE(rhs,1)
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum
 !
     zsum = SUM(rhs(1:ny))
     rhs(ny) = zsum
     rhs(1:ny-1) = 0.0d0
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     i = nx+nidbas1          ! The last spline index on x
     xg = spl%sp1%knots(nx)  ! Right boundary radial coordinate
     DO j=1,ny
        k = (i-1)*ny + j
        IF(MODULO(nidbas2,2) .EQ. 0 .AND. nlfix) THEN
           yg = (spl%sp2%knots(j-1)+spl%sp2%knots(j))/2.0d0
        ELSE
           yg = spl%sp2%knots(j-1)
        END IF
        rhs(k) = xg*COS(yg)
     END DO
   END SUBROUTINE ibcrhs
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
     IMPLICIT NONE
     DOUBLE PRECISION, INTENT(in) :: c(5)
     INTEGER, INTENT(iN) :: nx
     DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
     INTEGER :: nintg
     DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
     DOUBLE PRECISION :: a, b, dx, f0, f1, scal
     INTEGER :: i, k
 !
     a=x(0)
     b=x(nx)
     nintg = 10*nx
     ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
     dx = (b-a)/REAL(nintg)
     xint(0) = a
     fint(0) = 0.0d0
     f1 = fdist(xint(0))
     DO i=1,nintg
        f0 = f1
        xint(i) = xint(i-1) + dx
        f1 = fdist(xint(i))
        fint(i) = fint(i-1) + 0.5*(f0+f1)
     END DO
 !
 !  Normalization
 !
     scal = REAL(nx) / fint(nintg)
     fint(0:nintg) = fint(0:nintg) * scal
 !
 !  Obtain mesh point by (inverse) interpolation
 !
     k = 1
     DO i=1,nintg-1
        IF( fint(i) .GE. REAL(k) ) THEN
           x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
                &   (k-fint(i))
           k = k+1
        END IF
     END DO
     DEALLOCATE(xint, fint)
   CONTAINS
     DOUBLE PRECISION FUNCTION fdist(x)
       DOUBLE PRECISION, INTENT(in) :: x
       fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
     END FUNCTION fdist
   END SUBROUTINE meshdist
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE pde2d_nh_mod
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
 PROGRAM main
 !
   USE pde2d_nh_mod
   USE bsplines
   USE matrix
   USE futils
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
   LOGICAL :: nlppform
   INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it
   DOUBLE PRECISION :: pi, coefx(5), coefy(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
   TYPE(spline2d) :: splxy
   TYPE(gbmat) :: mat
 !
   CHARACTER(len=128) :: file='pde2d_nh.h5'
   INTEGER :: fid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
   DOUBLE PRECISION :: seconds, mem, dopla
   DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
   INTEGER :: nits=500
 !
   NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, coefx, coefy, nlfix
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intervals in x
   ny = 8              ! Number of intervals in y
   nidbas = (/3,3/)    ! Degree of splines
   ngauss = (/4,4/)    ! Number of Gauss points/interval
   mbess = 2           ! Exponent of differential problem
   nterms = 2          ! Number of terms in weak form
   nlppform = .TRUE.   ! Use PPFORM for gridval or not
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   nlfix = .TRUE.      ! Fix or not for even nidbas2
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x (=radial) & y (=poloidal) axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
 !
   WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NY', ny)
   CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
   CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
   CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
   CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
   CALL attach(fid, '/', 'MBESS', mbess)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   t0 = seconds()
   CALL set_spline(nidbas, ngauss, &
        &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
   WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
   WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
 !
 !   FE matrix assembly
 !
   nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
   kl = (nidbas(1)+1)*ny -1     ! Number of sub-diagnonals
   ku = kl                      ! Number of super-diagnonals
   WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
 !
   CALL init(kl, ku, nrank, nterms, mat)
   CALL dismat(splxy, mat)
   CALL putmat(fid, '/MAT0', mat, 'Assembled GB matrice')
   ALLOCATE(arr(nrank))
 !
 !   BC on Matrix
 !
   IF(nrank.LT.100) &
        & WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', mat%val(kl+ku+1,:)
   CALL ibcmat(mat, splxy)
   tmat = seconds() - t0
   IF(nrank.LT.100) &
        &   WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', mat%val(kl+ku+1,:)
 !
 !   RHS assembly
 !
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(mbess, splxy, rhs)
 !
 !   BC on RHS
 !
   CALL ibcrhs(rhs, splxy)
 
   CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
   CALL putmat(fid, '/MAT1', mat, 'GB matrice with BC')
   WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
   t0 = seconds()
   CALL factor(mat)
   tfact = seconds() - t0
   gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
 
   t0 = seconds()
   CALL bsolve(mat, rhs, sol)
 !
 !   Backtransform of solution
 !
   sol(1:ny-1) = sol(ny)
 !
 !   Spline coefficients, taking into account of periodicity in y
 !   Note: in SOL, y was numbered first.
 !
   dimx = splxy%sp1%dim
   dimy = splxy%sp2%dim
   ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,ny) + i*ny + 1
         bcoef(i,j) = sol(ij)
      END DO
   END DO
   WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
 !
   tsolv = seconds() - t0
   gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
   CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
 !===========================================================================
 !              4.0 Check the solution
 !
 !   Check function values computed with various method
 !
   ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) &
              &        + xgrid(i)*COS(ygrid(j))
      END DO
   END DO
   jder = (/0,0/)
 !
 !   Compute PPFORM/BCOEFS at first call to gridval
   CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
 !
   WRITE(*,'(/a)') '*** Checking solutions'
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   END DO
   tgrid = (seconds() - t0)/REAL(nits)
   errsol = solana - solcal
   IF( SIZE(bcoef,2) .LE. 10 ) THEN
      CALL prnmat('BCOEF', bcoef)
      CALL prnmat('SOLANA', solana)
      CALL prnmat('SOLCAL', solcal)
      CALL prnmat('ERRSOL', errsol)
   END IF
   WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
        &    norm2(errsol) / norm2(solana)
   WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
 !
   CALL putarr(fid, '/xgrid', xgrid, 'r')
   CALL putarr(fid, '/ygrid', ygrid, '\theta')
   CALL putarr(fid, '/sol', solcal, 'Solutions')
   CALL putarr(fid, '/solana', solana,'Exact solutions')
   CALL putarr(fid, '/errors', errsol, 'Errors')
 !
 !   Check derivatives d/dx and d/dy
 !
   WRITE(*,'(/a)') '*** Checking gradient'
   DO i=0,nx
      DO j=0,ny
         IF( mbess .EQ. 0 ) THEN
            solana(i,j) = -2.0d0 * xgrid(i)
         ELSE
            solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
              &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) &
              &          + COS(ygrid(j))
         END IF
      END DO
   END DO
 !
   jder = (/1,0/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   errsol = solana - solcal
   CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
   CALL putarr(fid, '/errors_x', errsol, 'Errors in d/dx')
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
 !
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * &
              &        SIN(mbess*ygrid(j)) &
              &        -xgrid(i)*SIN(ygrid(j))
      END DO
   END DO
 !
   jder = (/0,1/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
   errsol = solana - solcal
   CALL putarr(fid, '/errors_y', errsol, 'Errors in d/dy')
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
 !===========================================================================
 !              9.0  Epilogue
 !
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
   WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
 !
   DEALLOCATE(xgrid, rhs, sol)
   DEALLOCATE(solcal, solana, errsol)
   DEALLOCATE(bcoef)
   DEALLOCATE(arr)
   CALL destroy_sp(splxy)
   CALL destroy(mat)
 !
   CALL closef(fid)
 !===========================================================================
 !
 CONTAINS
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:,:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i, j
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        DO j=1,SIZE(x,2)
           sum2 = sum2 + x(i,j)**2
        END DO
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
   SUBROUTINE prnmat(label, mat)
     CHARACTER(len=*) :: label
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
     INTEGER :: i
     WRITE(*,'(/a)') TRIM(label)
     DO i=1,SIZE(mat,1)
        WRITE(*,'(10(1pe12.3))') mat(i,:)
     END DO
   END SUBROUTINE prnmat
 END PROGRAM main
 !
 !+++
 !+++
diff --git a/examples/pde2d_pardiso.f90 b/examples/pde2d_pardiso.f90
index 276f727..53619d3 100644
--- a/examples/pde2d_pardiso.f90
+++ b/examples/pde2d_pardiso.f90
@@ -1,741 +1,741 @@
 !>
 !> @file pde2d_pardiso.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !  Solving the 2d PDE using splines and PARDISO non-symmetric matrix
 !
 !    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
 !    exact solution: f(x,y) = (1-x^2) x^m cos(my)
 !
 MODULE pde2d_pardiso_mod
   USE bsplines
   USE pardiso_bsplines
   IMPLICIT NONE
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dismat(spl, mat)
 !
 !   Assembly of FE matrix mat using spline spl
 !
     TYPE(spline2d), INTENT(in)       :: spl
     TYPE(pardiso_mat), INTENT(inout) :: mat
 !
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2
     INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
     DOUBLE PRECISION:: contrib
 !
     INTEGER :: kterms         ! Number of terms in weak form
     INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
     DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
     INTEGER, ALLOCATABLE :: left1(:), left2(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
     WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
     WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !
 !   Weak form
 !
     kterms = mat%nterms
     ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
     ALLOCATE(coefs(kterms,ng1,ng2))
 !
     ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
     ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! 
 !===========================================================================
 !              2.0 Assembly loop
 !
     ALLOCATE(left1(ng1))
     ALLOCATE(left2(ng2))
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        left1 = i
        CALL basfun(xg1, spl%sp1, fun1, left1)
        DO j=1,n2
           CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
           left2 = j
           CALL basfun(xg2, spl%sp2, fun2, left2)
 !
           DO ig1=1,ng1
              DO ig2=1,ng2
                 CALL coefeq(xg1(ig1), xg2(ig2), &
                      &      idert(:,:,ig1,ig2), &
                      &      iderw(:,:,ig1,ig2), &
                      &      coefs(:,ig1,ig2))
              END DO
           END DO
 !
           DO iw1=0,nidbas1  ! Weight function in dir 1
              igw1 = i+iw1
              DO iw2=0,nidbas2  ! Weight function in dir 2
                 igw2 = MODULO(j+iw2-1, n2) + 1
                 irow = igw2 + (igw1-1)*n2
                 DO it1=0,nidbas1  ! Test function in dir 1
                    igt1 = i+it1
                    DO it2=0,nidbas2  ! Test function in dir 2
                       igt2 = MODULO(j+it2-1, n2) + 1
                       jcol = igt2 + (igt1-1)*n2
 !-------------
                       contrib = 0.0d0
                       DO ig1=1,ng1
                          DO ig2=1,ng2
                             DO iterm=1,kterms
                                contrib = contrib + &
                                     &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
                                     &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
                                     &    coefs(iterm,ig1,ig2) *                 &
                                     &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
                                     &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
                                     &    wg1(ig1) * wg2(ig2)
                             END DO
                          END DO
                       END DO
                       CALL updtmat(mat, irow, jcol, contrib)
 !-------------
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
     DEALLOCATE(idert, iderw, coefs)
     DEALLOCATE(left1,left2)
 !
   CONTAINS
     SUBROUTINE coefeq(x, y, idt, idw, c)
       DOUBLE PRECISION, INTENT(in) :: x, y
       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
       DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
 !
 ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
 !
       c(1) = x        ! 
       idt(1,1) = 1
       idt(1,2) = 0
       idw(1,1) = 1
       idw(1,2) = 0
       !
       c(2) = 1.d0/x
       idt(2,1) = 0
       idt(2,2) = 1
       idw(2,1) = 0
       idw(2,2) = 1
     END SUBROUTINE coefeq
   END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE disrhs(mbess, spl, rhs)
 !
 !   Assembly the RHS using 2d spline spl
 !
     INTEGER, INTENT(in) :: mbess
     TYPE(spline2d), INTENT(in) :: spl
     DOUBLE PRECISION, INTENT(out) :: rhs(:)
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
 !
     ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !===========================================================================
 !              2.0 Assembly loop
 !
     nrank = SIZE(rhs)
     rhs(1:nrank) = 0.0d0
 !
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), spl%sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                 contrib = wg1(ig1)*wg2(ig2) * &
                      &    rhseq(xg1(ig1),xg2(ig2), mbess)
                 DO k1=0,nidbas1
                    i1 = i+k1
                    DO k2=0,nidbas2
                       j2 = MODULO(j+k2-1,n2) + 1
                       ij = j2 + (i1-1)*n2
                       rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
 !
   CONTAINS
     DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
       DOUBLE PRECISION, INTENT(in) :: x1, x2
       INTEGER, INTENT(in) :: m
       rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
     END FUNCTION rhseq
   END SUBROUTINE disrhs
 !
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcmat(mat, ny)
 !
 !   Apply BC on matrix
 !
     TYPE(pardiso_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank, i, j
     DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = mat%rank
     ALLOCATE(zsum(nrank), arr(nrank))
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum on the NY-th row
 !
     zsum = 0.0d0
     DO i=1,ny
        arr = 0.0d0
        CALL getrow(mat, i, arr)
        zsum(:) = zsum(:) + arr(:)
     END DO
     CALL putrow(mat, ny, zsum)
 !
 !   The horizontal sum on the NY-th column
 !
     zsum = 0.0d0
     DO j=1,ny
        arr = 0.0d0
        CALL getcol(mat, j, arr)
        zsum(ny:) = zsum(ny:) + arr(ny:)
     END DO
     CALL putcol(mat, ny, zsum)
 !
 !   The away operator
 !
     DO j = 1,ny-1
        arr = 0.0d0; arr(j) = 1.0d0
        CALL putcol(mat, j, arr)     
     END DO
 !
     DO i = 1,ny-1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !  
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     DO j = nrank, nrank-ny+1, -1
        arr = 0.0d0; arr(j) = 1.0d0
        CALL putcol(mat, j, arr)     
     END DO
 !
     DO i = nrank, nrank-ny+1, -1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(zsum, arr)
 !
   END SUBROUTINE ibcmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcrhs(rhs, ny)
 !
 !   Apply BC on RHS
 !
     DOUBLE PRECISION, INTENT(inout) :: rhs(:)
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank
     DOUBLE PRECISION :: zsum
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = SIZE(rhs,1)
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum
 !
     zsum = SUM(rhs(1:ny))
     rhs(ny) = zsum
     rhs(1:ny-1) = 0.0d0
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     rhs(nrank-ny+1:nrank) = 0.0d0
   END SUBROUTINE ibcrhs
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE coefeq_poisson(x, y, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x, y
     INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
 ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
 !
     c(1) = x        ! 
     idt(1,1) = 1
     idt(1,2) = 0
     idw(1,1) = 1
     idw(1,2) = 0
 !
     c(2) = 1.d0/x
     idt(2,1) = 0
     idt(2,2) = 1
     idw(2,1) = 0
     idw(2,2) = 1
   END SUBROUTINE coefeq_poisson
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE pde2d_pardiso_mod
 PROGRAM main
   USE pde2d_pardiso_mod
   USE futils
   USE conmat_mod
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
   LOGICAL :: nlppform, nlconmat
   INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
   DOUBLE PRECISION :: pi, coefx(5), coefy(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
   TYPE(spline2d) :: splxy
   TYPE(pardiso_mat) :: mat
 !
   CHARACTER(len=128) :: file='pde2d_pardiso.h5'
   INTEGER :: fid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
   DOUBLE PRECISION :: seconds, mem, dopla
   DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
   DOUBLE PRECISION :: tconv, treord
   INTEGER :: nits=100
   LOGICAL :: nlmetis, nlforce_zero
 !
   NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlmetis, &
        &            nlforce_zero, nlconmat, coefx, coefy
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intervals in x
   ny = 8              ! Number of intervals in y
   nidbas = (/3,3/)    ! Degree of splines
   ngauss = (/4,4/)    ! Number of Gauss points/interval
   mbess = 2           ! Exponent of differential problem
   nterms = 2          ! Number of terms in weak form
   nlppform = .TRUE.   ! Use PPFORM for gridval or not
   nlmetis = .FALSE.   ! Use metis ordering or minimum degree
   nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
   nlconmat = .TRUE.   ! Use CONMAT instead of DISMAT
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x (=radial) & y (=poloidal) axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NY', ny)
   CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
   CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
   CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
   CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
   CALL attach(fid, '/', 'MBESS', mbess)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   CALL set_spline(nidbas, ngauss, &
        &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
 !!$  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
 !!$  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
 !
 !   FE matrix assembly
 !
   nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
   WRITE(*,'(a,i8)') 'nrank', nrank
 !
   CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero)
   t0 = seconds()
   IF(nlconmat) THEN
      CALL conmat(splxy, mat, coefeq_poisson)
   ELSE
      CALL dismat(splxy, mat)
   END IF
   tmat = seconds() - t0
   ALLOCATE(arr(nrank))
   IF(nrank.LT.100) THEN
      DO i=1,nrank
         CALL getele(mat, i, i, arr(i))
      END DO
      WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr
   END IF
 !
 !   BC on Matrix
 !
   WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
   WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
   CALL ibcmat(mat, ny)
   IF(nrank.LT.100) THEN
      DO i=1,nrank
         CALL getele(mat, i, i, arr(i))
      END DO
      WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr
      WRITE(*,'(a)') 'Last rows'
      DO i=nrank-ny,nrank
         CALL getrow(mat, i, arr)
         WRITE(*,'(10(1pe12.3))') arr
      END DO
   END IF
 !
 !   RHS assembly
 !
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(mbess, splxy, rhs)
 !
 !   BC on RHS
 !
   CALL ibcrhs(rhs, ny)
 !
   CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
   WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
   t0 = seconds()
   CALL to_mat(mat)
   tconv = seconds() -t0
   WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat)
   WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem()
 !
   t0 = seconds()
   CALL reord_mat(mat, nlmetis=nlmetis, debug=.FALSE.)
   CALL putmat(fid, '/MAT', mat)
   treord = seconds() - t0
 !
   t0 = seconds()
   CALL numfact(mat, debug=.FALSE.)
   tfact = seconds() - t0
  
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
   WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A  = ',mat%p%iparm(18)
   WRITE(*,'(a,i12)')  'Number of factorization MFLOPS      = ',mat%p%iparm(19)
   gflops1 = mat%p%iparm(19) / tfact / 1.d3
 !
   CALL bsolve(mat, rhs, sol, debug=.FALSE.)
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL bsolve(mat, rhs, sol)
      sol(1:ny-1) = sol(ny)
   END DO
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
   tsolv = (seconds() - t0)/REAL(nits)
 !
 !   Spline coefficients, taking into account of periodicity in y
 !   Note: in SOL, y was numbered first.
 !
   dimx = splxy%sp1%dim
   dimy = splxy%sp2%dim
   ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,ny) + i*ny + 1
         bcoef(i,j) = sol(ij)
      END DO
   END DO
   WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
   CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
 !===========================================================================
 !              4.0 Check the solution
 !
 !   Check function values computed with various method
 !
   ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
      END DO
   END DO
   jder = (/0,0/)
 !
 !   Compute PPFORM/BCOEFS at first call to gridval
   CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
 !
   WRITE(*,'(/a)') '*** Checking solutions'
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   END DO
   tgrid = (seconds() - t0)/REAL(nits)
   errsol = solana - solcal
   IF( SIZE(bcoef,2) .LE. 10 ) THEN
      CALL prnmat('BCOEF', bcoef)
      CALL prnmat('SOLANA', solana)
      CALL prnmat('SOLCAL', solcal)
      CALL prnmat('ERRSOL', errsol)
   END IF
   WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
        &    norm2(errsol) / norm2(solana)
   WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
 !
   CALL putarr(fid, '/xgrid', xgrid, 'r')
   CALL putarr(fid, '/ygrid', ygrid, '\theta')
   CALL putarr(fid, '/sol', solcal, 'Solutions')
   CALL putarr(fid, '/solana', solana,'Exact solutions')
   CALL putarr(fid, '/errors', errsol, 'Errors')
 !
 !   Check derivatives d/dx and d/dy
 !
   WRITE(*,'(/a)') '*** Checking gradient'
   DO i=0,nx
      DO j=0,ny
         IF( mbess .EQ. 0 ) THEN
            solana(i,j) = -2.0d0 * xgrid(i)
         ELSE
            solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
              &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
         END IF
      END DO
   END DO
 !
   jder = (/1,0/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   errsol = solana - solcal
   CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
 !
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
      END DO
   END DO
 !
   jder = (/0,1/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
   errsol = solana - solcal
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
 !
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s)   ', tconv
   WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s)      ', treord
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
   WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
   WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
 !===========================================================================
 !              5.0 Clear the matrix and recompute
 !
   WRITE(*,'(/a)') 'Recompute the solver ...'
   CALL clear_mat(mat)
   t0 = seconds()
   IF(nlconmat) THEN
      CALL conmat(splxy, mat, coefeq_poisson)
   ELSE
      CALL dismat(splxy, mat)
   END IF
   tmat = seconds()-t0
   CALL ibcmat(mat, ny)
 !
   t0 = seconds()
   CALL numfact(mat, debug=.FALSE.)
   tfact = seconds()-t0
   gflops1 = mat%p%iparm(19) / tfact / 1.d3
 !
   t0 = seconds()
   ALLOCATE(newsol(nrank))
   CALL bsolve(mat, rhs, newsol)
   newsol(1:ny-1) = newsol(ny)
   tsolv = seconds()-t0
 !
   WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
   WRITE(*,'(a,1pe12.3)') 'Total (s)                     ', tmat+tfact+tsolv
   WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
 !
   DEALLOCATE(newsol)
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(xgrid, rhs, sol)
   DEALLOCATE(solcal, solana, errsol)
   DEALLOCATE(bcoef)
   DEALLOCATE(arr)
   CALL destroy_sp(splxy)
   CALL destroy(mat)
 !
   CALL closef(fid)
 !===========================================================================
 !
 CONTAINS
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:,:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i, j
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        DO j=1,SIZE(x,2)
           sum2 = sum2 + x(i,j)**2
        END DO
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
   SUBROUTINE prnmat(label, mat)
     CHARACTER(len=*) :: label
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
     INTEGER :: i
     WRITE(*,'(/a)') TRIM(label)
     DO i=1,SIZE(mat,1)
        WRITE(*,'(10(1pe12.3))') mat(i,:)
     END DO
   END SUBROUTINE prnmat
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
diff --git a/examples/pde2d_pb.f90 b/examples/pde2d_pb.f90
index e764273..bd18342 100644
--- a/examples/pde2d_pb.f90
+++ b/examples/pde2d_pb.f90
@@ -1,696 +1,696 @@
 !>
 !> @file pde2d_pb.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Solving the following 2d PDE using splines:
 !
 !    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
 !    exact solution: f(x,y) = (1-x^2) x^m cos(my)
 !
   USE bsplines
   USE matrix
   USE conmat_mod
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
   LOGICAL :: nlppform, nlconmat
   INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it
   DOUBLE PRECISION :: pi, coefx(5), coefy(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
   TYPE(spline2d) :: splxy
   TYPE(pbmat) :: mat
 !
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
   DOUBLE PRECISION :: seconds, mem, dopla
   DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
   INTEGER :: nits=500
 !
   INTERFACE
      SUBROUTINE dismat(spl, mat)
        USE bsplines
        USE matrix
        TYPE(spline2d), INTENT(in) :: spl
        TYPE(pbmat), INTENT(inout) :: mat
      END SUBROUTINE dismat
      SUBROUTINE disrhs(mbess, spl, rhs)
        USE bsplines
        INTEGER, INTENT(in) :: mbess
        TYPE(spline2d), INTENT(in) :: spl
        DOUBLE PRECISION, INTENT(out) :: rhs(:)
      END SUBROUTINE disrhs
      SUBROUTINE meshdist(coefs, x, nx)
        DOUBLE PRECISION, INTENT(in) :: coefs(5)
        INTEGER, INTENT(iN) :: nx
        DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
      END SUBROUTINE meshdist
      SUBROUTINE ibcmat(mat, ny)
        USE matrix
        TYPE(pbmat), INTENT(inout) :: mat
        INTEGER, INTENT(in) :: ny
      END SUBROUTINE ibcmat
      SUBROUTINE ibcrhs(rhs, ny)
        DOUBLE PRECISION, INTENT(inout) :: rhs(:)
        INTEGER, INTENT(in) :: ny
      END SUBROUTINE ibcrhs
      SUBROUTINE coefeq(x, y, idt, idw, c)
        DOUBLE PRECISION, INTENT(in) :: x, y
        INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
        DOUBLE PRECISION, INTENT(out) :: c(:)
      END SUBROUTINE coefeq
   END INTERFACE
 !
   NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlconmat, &
        &            coefx, coefy
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intervals in x
   ny = 8              ! Number of intervals in y
   nidbas = (/3,3/)    ! Degree of splines
   ngauss = (/4,4/)    ! Number of Gauss points/interval
   mbess = 2           ! Exponent of differential problem
   nterms = 2          ! Number of terms in weak form
   nlppform = .TRUE.   ! Use PPFORM for gridval or not
   nlconmat = .TRUE.   ! Use CONMAT instead of DISMAT
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x (=radial) & y (=poloidal) axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
 !
   WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   CALL set_spline(nidbas, ngauss, &
        &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
 !!$  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
 !!$  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
 !
 !   FE matrix assembly
 !
   nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
   kl = (nidbas(1)+1)*ny -1     ! Number of sub-diagnonals
   ku = kl                      ! Number of super-diagnonals
   WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
 !
   CALL init(ku, nrank, nterms, mat)
   t0 = seconds()
   IF(nlconmat) THEN
      CALL conmat(splxy, mat, coefeq)
   ELSE
      CALL dismat(splxy, mat)
   END IF
   tmat = seconds() - t0
   ALLOCATE(arr(nrank))
 !
 !   BC on Matrix
 !
   IF(nrank.LT.100) &
        &  WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', mat%val(ku+1,:)
   CALL ibcmat(mat, ny)
   IF(nrank.LT.100) &
        &    WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', mat%val(ku+1,:)
 !
 !   RHS assembly
 !
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(mbess, splxy, rhs)
 !
 !   BC on RHS
 !
   CALL ibcrhs(rhs, ny)
 
   WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
   t0 = seconds()
   CALL factor(mat)
   tfact = seconds() - t0
   gflops1 = dopla('DPBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
 
   t0 = seconds()
   CALL bsolve(mat, rhs, sol)
 !
 !   Backtransform of solution
 !
   sol(1:ny-1) = sol(ny)
 !
 !   Spline coefficients, taking into account of periodicity in y
 !   Note: in SOL, y was numbered first.
 !
   dimx = splxy%sp1%dim
   dimy = splxy%sp2%dim
   ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,ny) + i*ny + 1
         bcoef(i,j) = sol(ij)
      END DO
   END DO
   WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
 !
   tsolv = seconds() - t0
   gflops2 = dopla('DPBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
 !===========================================================================
 !              4.0 Check the solution
 !
 !   Check function values computed with various method
 !
   ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
      END DO
   END DO
   jder = (/0,0/)
 !
 !   Compute PPFORM at first call to gridval
   IF(nlppform) THEN
      CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
   END IF
 !
   WRITE(*,'(/a)') '*** Checking solutions'
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   END DO
   tgrid = (seconds() - t0)/REAL(nits)
   errsol = solana - solcal
   IF( SIZE(bcoef,2) .LE. 10 ) THEN
      CALL prnmat('BCOEF', bcoef)
      CALL prnmat('SOLANA', solana)
      CALL prnmat('SOLCAL', solcal)
      CALL prnmat('ERRSOL', errsol)
   END IF
   WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
        &    norm2(errsol) / norm2(solana)
   WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
 !
 !   Check derivatives d/dx and d/dy
 !
   WRITE(*,'(/a)') '*** Checking gradient'
   DO i=0,nx
      DO j=0,ny
         IF( mbess .EQ. 0 ) THEN
            solana(i,j) = -2.0d0 * xgrid(i)
         ELSE
            solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
              &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
         END IF
      END DO
   END DO
 !
   jder = (/1,0/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   errsol = solana - solcal
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
 !
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
      END DO
   END DO
 !
   jder = (/0,1/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   errsol = solana - solcal
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
 !===========================================================================
 !              9.0  Epilogue
 !
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
   WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
 !
   DEALLOCATE(xgrid, rhs, sol)
   DEALLOCATE(solcal, solana, errsol)
   DEALLOCATE(bcoef)
   DEALLOCATE(arr)
   CALL destroy_sp(splxy)
   CALL destroy(mat)
 !
 !===========================================================================
 !
 CONTAINS
   SUBROUTINE prntmat(str, a)
     DOUBLE PRECISION, DIMENSION(:,:) :: a
     CHARACTER(len=*) :: str
     INTEGER :: i
     WRITE(*,'(a)') TRIM(str)
     DO i=1,SIZE(a,1)
        WRITE(*,'(10f8.1)') a(i,:)
     END DO
   END SUBROUTINE prntmat
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:,:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i, j
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        DO j=1,SIZE(x,2)
           sum2 = sum2 + x(i,j)**2
        END DO
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
   SUBROUTINE prnmat(label, mat)
     CHARACTER(len=*) :: label
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
     INTEGER :: i
     WRITE(*,'(/a)') TRIM(label)
     DO i=1,SIZE(mat,1)
        WRITE(*,'(10(1pe12.3))') mat(i,:)
     END DO
   END SUBROUTINE prnmat
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
 SUBROUTINE dismat(spl, mat)
 !
 !   Assembly of FE matrix mat using spline spl
 !
   USE bsplines
   USE matrix
   IMPLICIT NONE
   TYPE(spline2d), INTENT(in) :: spl
   TYPE(pbmat), INTENT(inout) :: mat
 !
   INTEGER :: n1, nidbas1, ndim1, ng1
   INTEGER :: n2, nidbas2, ndim2, ng2
   INTEGER :: i, j, ig1, ig2
   INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
   DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
   DOUBLE PRECISION:: contrib
 !
   INTEGER :: kterms         ! Number of terms in weak form
   INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
   DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
   INTEGER, ALLOCATABLE :: left1(:), left2(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
   CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
   WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
   WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl%sp1, ng1)
   CALL get_gauss(spl%sp2, ng2)
   ALLOCATE(xg1(ng1), wg1(ng1))
   ALLOCATE(xg2(ng1), wg2(ng1))
 !
 !   Weak form
 !
   kterms = mat%nterms
   ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
   ALLOCATE(coefs(kterms,ng1,ng2))
 !
   ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
   ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! 
 !===========================================================================
 !              2.0 Assembly loop
 !
   ALLOCATE(left1(ng1))
   ALLOCATE(left2(ng2))
   DO i=1,n1
      CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
      left1 = i
      CALL basfun(xg1, spl%sp1, fun1, left1)
      DO j=1,n2
         CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
         left2 = j
         CALL basfun(xg2, spl%sp2, fun2, left2)
 !
         DO ig1=1,ng1
            DO ig2=1,ng2
               CALL coefeq(xg1(ig1), xg2(ig2), &
                    &      idert(:,:,ig1,ig2), &
                    &      iderw(:,:,ig1,ig2), &
                    &      coefs(:,ig1,ig2))
            END DO
         END DO
 !
         DO iw1=0,nidbas1  ! Weight function in dir 1
            igw1 = i+iw1
            DO iw2=0,nidbas2  ! Weight function in dir 2
               igw2 = MODULO(j+iw2-1, n2) + 1
               irow = igw2 + (igw1-1)*n2
               DO it1=0,nidbas1  ! Test function in dir 1
                  igt1 = i+it1
                  DO it2=0,nidbas2  ! Test function in dir 2
                     igt2 = MODULO(j+it2-1, n2) + 1
                     jcol = igt2 + (igt1-1)*n2
 !-------------
                     contrib = 0.0d0
                     DO ig1=1,ng1
                        DO ig2=1,ng2
                           DO iterm=1,kterms
                              contrib = contrib + &
                                   &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
                                   &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
                                   &    coefs(iterm,ig1,ig2) *                 &
                                   &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
                                   &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
                                   &    wg1(ig1) * wg2(ig2)
                           END DO
                        END DO
                     END DO
                     CALL updtmat(mat, irow, jcol, contrib)
 !-------------
                  END DO
               END DO
            END DO
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(xg1, wg1, fun1)
   DEALLOCATE(xg2, wg2, fun2)
   DEALLOCATE(idert, iderw, coefs)
   DEALLOCATE(left1,left2)
 !
 CONTAINS
   SUBROUTINE coefeq(x, y, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x, y
     INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
 ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
 !
     c(1) = x        ! 
     idt(1,1) = 1
     idt(1,2) = 0
     idw(1,1) = 1
     idw(1,2) = 0
 !
     c(2) = 1.d0/x
     idt(2,1) = 0
     idt(2,2) = 1
     idw(2,1) = 0
     idw(2,2) = 1
   END SUBROUTINE coefeq
 END SUBROUTINE dismat
 
 SUBROUTINE disrhs(mbess, spl, rhs)
 !
 !   Assembly the RHS using 2d spline spl
 !
   USE bsplines
   IMPLICIT NONE
   INTEGER, INTENT(in) :: mbess
   TYPE(spline2d), INTENT(in) :: spl
   DOUBLE PRECISION, INTENT(out) :: rhs(:)
   INTEGER :: n1, nidbas1, ndim1, ng1
   INTEGER :: n2, nidbas2, ndim2, ng2
   INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
   DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
   DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
   CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
 !
   ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
   ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
   CALL get_gauss(spl%sp1, ng1)
   CALL get_gauss(spl%sp2, ng2)
   WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2
   ALLOCATE(xg1(ng1), wg1(ng1))
   ALLOCATE(xg2(ng1), wg2(ng1))
 !===========================================================================
 !              2.0 Assembly loop
 !
   nrank = SIZE(rhs)
   rhs(1:nrank) = 0.0d0
 !
   DO i=1,n1
      CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
      DO ig1=1,ng1
         CALL basfun(xg1(ig1), spl%sp1, fun1, i)
         DO j=1,n2
            CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
            DO ig2=1,ng2
               CALL basfun(xg2(ig2), spl%sp2, fun2, j)
               contrib = wg1(ig1)*wg2(ig2) * &
                    &    rhseq(xg1(ig1),xg2(ig2), mbess)
               DO k1=0,nidbas1
                  i1 = i+k1
                  DO k2=0,nidbas2
                     j2 = MODULO(j+k2-1,n2) + 1
                     ij = j2 + (i1-1)*n2
                     rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
                  END DO
               END DO
            END DO
         END DO
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(xg1, wg1, fun1)
   DEALLOCATE(xg2, wg2, fun2)
 !
 CONTAINS
   DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
     DOUBLE PRECISION, INTENT(in) :: x1, x2
     INTEGER, INTENT(in) :: m
     rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
   END FUNCTION rhseq
 END SUBROUTINE disrhs
 
 SUBROUTINE ibcmat(mat, ny)
 !
 !   Apply BC on matrix
 !
   USE matrix
   IMPLICIT NONE
   TYPE(pbmat), INTENT(inout) :: mat
   INTEGER, INTENT(in) :: ny
   INTEGER :: kl, ku, nrank, i, j
   DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
   INTEGER :: i0, ii
   INTEGER :: i0_arr(ny)
 !===========================================================================
 !              1.0 Prologue
 !
 
   ku = mat%ku
   kl = ku
   nrank = mat%rank
   ALLOCATE(zsum(nrank), arr(nrank))
 !
   i0 = nrank - ku
   WRITE(*,'(a,i6)') 'Estimated i0', i0
   DO i=1,ny
      CALL getcol(mat, nrank-ny+i, arr)
      DO ii=1,nrank
         i0_arr(i)=ii
         IF(arr(ii) .NE. 0.0d0) EXIT
      END DO
   END DO
 !!$  WRITE(*,'(a/(10i6))') 'i0_arr', i0_arr
 !
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum on the NY-th row
 !
   zsum = 0.0d0
   DO i=1,ny
      arr = 0.0d0
      CALL getrow(mat, i, arr)
      DO j=1,ny+ku
         zsum(j) = zsum(j) + arr(j)
      END DO
   END DO
 !
   zsum(ny) = SUM(zsum(1:ny))   ! using symmetry
   CALL putrow(mat, ny, zsum)
 !
 !   The away operator
 !
   DO i = 1,ny-1
      arr = 0.0d0; arr(i) = 1.0d0
      CALL putrow(mat, i, arr)     
   END DO
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
   DO i = nrank, nrank-ny+1, -1
      CALL getcol(mat, i, arr)
      arr = 0.0d0; arr(i) = 1.0d0
      CALL putrow(mat, i, arr)     
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(zsum, arr)
 !
 END SUBROUTINE ibcmat
 !+++
 SUBROUTINE ibcrhs(rhs, ny)
 !
 !   Apply BC on RHS
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(inout) :: rhs(:)
   INTEGER, INTENT(in) :: ny
   INTEGER :: nrank
   DOUBLE PRECISION :: zsum
 !===========================================================================
 !              1.0 Prologue
 !
   nrank = SIZE(rhs,1)
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum
 !
   zsum = SUM(rhs(1:ny))
   rhs(ny) = zsum
   rhs(1:ny-1) = 0.0d0
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
   rhs(nrank-ny+1:nrank) = 0.0d0
 END SUBROUTINE ibcrhs
 !++++
 SUBROUTINE coefeq(x, y, idt, idw, c)
   DOUBLE PRECISION, INTENT(in) :: x, y
   INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
   DOUBLE PRECISION, INTENT(out) :: c(:)
 !
 ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
 !
   c(1) = x        ! 
   idt(1,1) = 1
   idt(1,2) = 0
   idw(1,1) = 1
   idw(1,2) = 0
 !
   c(2) = 1.d0/x
   idt(2,1) = 0
   idt(2,2) = 1
   idw(2,1) = 0
   idw(2,2) = 1
 END SUBROUTINE coefeq
diff --git a/examples/pde2d_petsc.f90 b/examples/pde2d_petsc.f90
index 4378b3f..54cd8db 100644
--- a/examples/pde2d_petsc.f90
+++ b/examples/pde2d_petsc.f90
@@ -1,795 +1,795 @@
 !>
 !> @file pde2d_petsc.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !  Solving the 2d PDE using splines and PETSC matrix
 !
 !    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
 !    exact solution: f(x,y) = (1-x^2) x^m cos(my)
 !
 MODULE pde2d_petsc_mod
   USE bsplines
   USE petsc_bsplines
   IMPLICIT NONE
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dismat(spl, mat)
 !
 !   Assembly of FE matrix mat using spline spl
 !
     TYPE(spline2d), INTENT(in)     :: spl
     TYPE(petsc_mat), INTENT(inout) :: mat
 !
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2
     INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
     DOUBLE PRECISION:: contrib
 !
     INTEGER :: kterms         ! Number of terms in weak form
     INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
     DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
     INTEGER, ALLOCATABLE :: left1(:), left2(:)
 !
     INTEGER :: istart, iend
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
 !!$    WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
 !!$    WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !
 !   Weak form
 !
     kterms = mat%nterms
     ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
     ALLOCATE(coefs(kterms,ng1,ng2))
 !
     ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
     ALLOCATE(fun2(0:nidbas2,0:1,ng2)) !
 !
 !   Matrix partition
 !
     istart = mat%istart
     iend   = mat%iend
 !===========================================================================
 !              2.0 Assembly loop
 !
     ALLOCATE(left1(ng1))
     ALLOCATE(left2(ng2))
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        left1 = i
        CALL basfun(xg1, spl%sp1, fun1, left1)
        DO j=1,n2
           CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
           left2 = j
           CALL basfun(xg2, spl%sp2, fun2, left2)
 !
           DO ig1=1,ng1
              DO ig2=1,ng2
                 CALL coefeq(xg1(ig1), xg2(ig2), &
                      &      idert(:,:,ig1,ig2), &
                      &      iderw(:,:,ig1,ig2), &
                      &      coefs(:,ig1,ig2))
              END DO
           END DO
 !
           DO iw1=0,nidbas1  ! Weight function in dir 1
              igw1 = i+iw1
              DO iw2=0,nidbas2  ! Weight function in dir 2
                 igw2 = MODULO(j+iw2-1, n2) + 1
                 irow = igw2 + (igw1-1)*n2
                 IF( irow.GE.istart .AND. irow.LE.iend) THEN
                    DO it1=0,nidbas1  ! Test function in dir 1
                       igt1 = i+it1
                       DO it2=0,nidbas2  ! Test function in dir 2
                          igt2 = MODULO(j+it2-1, n2) + 1
                          jcol = igt2 + (igt1-1)*n2
 !-------------
                          contrib = 0.0d0
                          DO ig1=1,ng1
                             DO ig2=1,ng2
                                DO iterm=1,kterms
                                   contrib = contrib + &
                                        &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
                                        &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
                                        &    coefs(iterm,ig1,ig2) *                 &
                                        &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
                                        &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
                                        &    wg1(ig1) * wg2(ig2)
                                END DO
                             END DO
                          END DO
                          CALL updtmat(mat, irow, jcol, contrib)
 !-------------
                       END DO
                    END DO
                 END IF
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
     DEALLOCATE(idert, iderw, coefs)
     DEALLOCATE(left1,left2)
 !
   CONTAINS
     SUBROUTINE coefeq(x, y, idt, idw, c)
       DOUBLE PRECISION, INTENT(in) :: x, y
       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
       DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
 !
 ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
 !
       c(1) = x        ! 
       idt(1,1) = 1
       idt(1,2) = 0
       idw(1,1) = 1
       idw(1,2) = 0
       !
       c(2) = 1.d0/x
       idt(2,1) = 0
       idt(2,2) = 1
       idw(2,1) = 0
       idw(2,2) = 1
     END SUBROUTINE coefeq
   END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE disrhs(mbess, spl, rhs)
 !
 !   Assembly the RHS using 2d spline spl
 !
     INTEGER, INTENT(in) :: mbess
     TYPE(spline2d), INTENT(in) :: spl
     DOUBLE PRECISION, INTENT(out) :: rhs(:)
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
 !
     ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !===========================================================================
 !              2.0 Assembly loop
 !
     nrank = SIZE(rhs)
     rhs(1:nrank) = 0.0d0
 !
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), spl%sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                 contrib = wg1(ig1)*wg2(ig2) * &
                      &    rhseq(xg1(ig1),xg2(ig2), mbess)
                 DO k1=0,nidbas1
                    i1 = i+k1
                    DO k2=0,nidbas2
                       j2 = MODULO(j+k2-1,n2) + 1
                       ij = j2 + (i1-1)*n2
                       rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
 !
   CONTAINS
     DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
       DOUBLE PRECISION, INTENT(in) :: x1, x2
       INTEGER, INTENT(in) :: m
       rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
     END FUNCTION rhseq
   END SUBROUTINE disrhs
 !
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcmat(mat, ny)
 !
 !   Apply BC on matrix
 !
     TYPE(petsc_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank, i, j
     DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = mat%rank
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum on the NY-th row
 !
     ALLOCATE(zsum(nrank), arr(nrank))
     zsum = 0.0d0
     DO i=1,ny
        arr = 0.0d0
        CALL getrow(mat, i, arr)
        zsum(:) = zsum(:) + arr(:)
     END DO
     IF(mat%nlsym) THEN
        zsum(ny) = SUM(zsum(1:ny))   ! using symmetry
     END IF
     CALL putrow(mat, ny, zsum)
 !
 !   The horizontal sum on the NY-th column
 !
     IF( .NOT.mat%nlsym) THEN
        zsum = 0.0d0
        DO j=1,ny
           arr = 0.0d0
           CALL getcol(mat, j, arr)
           zsum(ny:) = zsum(ny:) + arr(ny:)
        END DO
        CALL putcol(mat, ny, zsum)
     END IF
 !
 !   The away operator
 !
     IF( .NOT.mat%nlsym) THEN
        DO j = 1,ny-1
           arr = 0.0d0; arr(j) = 1.0d0
           CALL putcol(mat, j, arr)     
        END DO
     END IF
 !
     DO i = 1,ny-1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
     DEALLOCATE(zsum)
     DEALLOCATE(arr)
 !  
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
        ALLOCATE(arr(nrank))
        IF( .NOT.mat%nlsym) THEN
           DO j = nrank, nrank-ny+1, -1
              arr = 0.0d0; arr(j) = 1.0d0
              CALL putcol(mat, j, arr)     
           END DO
        END IF
 !
        DO i = nrank, nrank-ny+1, -1
           arr = 0.0d0; arr(i) = 1.0d0
           CALL putrow(mat, i, arr)     
        END DO
        DEALLOCATE(arr)
 !===========================================================================
 !              9.0  Epilogue
 !
   END SUBROUTINE ibcmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcrhs(rhs, ny)
 !
 !   Apply BC on RHS
 !
     DOUBLE PRECISION, INTENT(inout) :: rhs(:)
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank
     DOUBLE PRECISION :: zsum
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = SIZE(rhs,1)
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum
 !
     zsum = SUM(rhs(1:ny))
     rhs(ny) = zsum
     rhs(1:ny-1) = 0.0d0
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     rhs(nrank-ny+1:nrank) = 0.0d0
   END SUBROUTINE ibcrhs
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE pde2d_petsc_mod
 PROGRAM main
   USE pde2d_petsc_mod
   USE futils
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
   INTEGER :: nitmax=10000, nits, nits0, ntrials=0
   DOUBLE PRECISION :: rtol=1.e-9
   LOGICAL :: nlppform
   INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
   DOUBLE PRECISION :: pi, coefx(5), coefy(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
   DOUBLE PRECISION, ALLOCATABLE :: row_sum(:), row(:)
   TYPE(spline2d) :: splxy
   TYPE(petsc_mat) :: mat
 !
   CHARACTER(len=128) :: file='pde2d_petsc.h5'
   INTEGER :: fid
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
   DOUBLE PRECISION :: mem, dopla
   DOUBLE PRECISION :: t0, tmat, tsolv, tsolv0, tgrid, gflops1, gflops2
   LOGICAL :: nlsym
   LOGICAL :: nlforce_zero
 !
   INTEGER :: ierr, me
   INTEGER(kind=8) :: nzfact
   INTEGER :: nnz_loc, nnz
   DOUBLE PRECISION :: mem_loc, mem_min, mem_max
 !
   CHARACTER(len=128) :: matfile='mat.dat'
   logical :: file_exist
 !
   NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlsym,&
        &            nlforce_zero, coefx, coefy, nitmax, rtol, ntrials, &
        &            matfile
 !===========================================================================
 !              1.0 Prologue
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intervals in x
   ny = 8              ! Number of intervals in y
   nidbas = (/3,3/)    ! Degree of splines
   ngauss = (/4,4/)    ! Number of Gauss points/interval
   mbess = 2           ! Exponent of differential problem
   nterms = 2          ! Number of terms in weak form
   nlppform = .TRUE.   ! Use PPFORM for gridval or not
   nlsym    = .FALSE.  ! Symmetric or unsymmetric matrix
   nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   nitmax = 10000      ! Max number ofviterations
   rtol = 1.e-9        ! Relative tolerance
   ntrials = 0         ! Run ntrials solution steps after setup
   matfile = ''        ! Save matrix file to matfile if not empty
 !
   IF(me.EQ.0) THEN
      READ(*,newrun)
      WRITE(*,newrun)
   END IF
   CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(mbess, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nterms, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nlppform, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nlforce_zero, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(coefx, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(coefy, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nitmax, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(ntrials, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(matfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
 !
 !   Define grid on x (=radial) & y (=poloidal) axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
 !
 !   Create hdf5 file
 !
   IF(me.EQ.0) THEN
      CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
      CALL attach(fid, '/', 'NX', nx)
      CALL attach(fid, '/', 'NY', ny)
      CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
      CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
      CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
      CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
      CALL attach(fid, '/', 'MBESS', mbess)
   END IF
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   t0 = mpi_wtime()
   CALL set_spline(nidbas, ngauss, &
        &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
 !
 !   FE matrix assembly
 !
   nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
   IF(me.EQ.0) WRITE(*,'(a,i12 )') 'nrank', nrank
 !
   CALL init(nrank, nterms, mat, comm=MPI_COMM_WORLD)
 !
   INQUIRE(file=TRIM(matfile), exist=file_exist)
   IF( file_exist ) THEN
      t0 = mpi_wtime()
      CALL load_mat(mat, matfile)
      tmat = mpi_wtime()-t0
      if(me.eq.0) WRITE(*,'(a,1pe12.3)')    'Mat read time (s) ', tmat
   ELSE
      t0 = mpi_wtime()
      CALL dismat(splxy, mat)
      CALL ibcmat(mat, ny)
 !
 !!$     ALLOCATE(row_sum(mat%istart:mat%iend))
 !!$     ALLOCATE(row(mat%rank))
 !!$     DO i=mat%istart,mat%iend
 !!$        row = 0.0d0
 !!$        CALL getrow(mat, i, row)
 !!$        row_sum(i) = SUM(row)        
 !!$     END DO
 !!$     WRITE(*,'(a,i3.3,a,(10(1pe12.3)))') 'PE', me, ': row_sum', row_sum
 !
      CALL to_mat(mat)
 !
 !!$     DO i=mat%istart,mat%iend
 !!$        row = 0.0d0
 !!$        CALL getrow(mat, i, row)
 !!$        row_sum(i) = SUM(row)        
 !!$     END DO
 !!$     WRITE(*,'(a,i3.3,a,(10(1pe12.3)))') 'PE', me, ': row_sum(after)', row_sum
      CALL save_mat(mat, matfile)
      tmat = mpi_wtime() - t0
      IF(me.EQ.0) WRITE(*,'(a,1pe12.3)')    'Mat construction time (s) ', tmat
   END IF
 !
   IF(me.EQ.0) THEN
      WRITE(*,'(a,2i16)') 'Mat rank, nnz', mat%rank, mat%nnz
   END IF
 !
 !   RHS assembly
 !
   t0=mpi_wtime()
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(mbess, splxy, rhs)
 !
 !   BC on RHS
 !
   CALL ibcrhs(rhs, ny)
 !
   mem_loc = mem()
   CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after DISMAT')
   IF(me.EQ.0) THEN
      CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
   END IF
   IF(me.EQ.0)  WRITE(*,'(a,1pe12.3)') 'RHS construction time (s) ', mpi_wtime()-t0
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
 
   CALL minmax_i8(mat%nnz_loc, MPI_COMM_WORLD, 'local nnz')
   IF(me.EQ.0) THEN 
      WRITE(*,'(a,i16)') 'Number of non-zeros of matrix = ', mat%nnz
   END IF
 !
   t0 = mpi_wtime()
   CALL bsolve(mat, rhs, sol, rtol, nitmax, nits0)
   tsolv0 = mpi_wtime() - t0
   IF(me.EQ.0) WRITE(*,'(a,1pe12.3,i8)') 'Solve+setup time(s) and nits   ', tsolv0, nits0
 !
   IF(ntrials .GT. 0) THEN
      t0 = mpi_wtime()
      DO it=1,ntrials   ! ntrials iterations for timing
         sol = 0.0d0
         CALL bsolve(mat, rhs, sol, rtol, nitmax, nits)
         sol(1:ny-1) = sol(ny)
      END DO
      tsolv = (mpi_wtime() - t0)/REAL(ntrials)
      IF(me.EQ.0) WRITE(*,'(a,1pe12.3,i8)') 'Solve time(s) and nits         ', tsolv, nits
   END IF
 !
   mem_loc = mem()
   CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after BSOLVE')
 !
   CALL destroy(mat)
 !
 !   Spline coefficients, taking into account of periodicity in y
 !   Note: in SOL, y was numbered first.
 !
   dimx = splxy%sp1%dim
   dimy = splxy%sp2%dim
   ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,ny) + i*ny + 1
         bcoef(i,j) = sol(ij)
      END DO
   END DO
 !
   mem_loc = mem()
   CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after setting bcoef')
 !===========================================================================
 !              4.0 Check the solution
 !
 !   Check function values computed with various method
 !
   IF(me.EQ.0) THEN
      ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
      DO i=0,nx
         DO j=0,ny
            solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
         END DO
      END DO
      jder = (/0,0/)
 !
 !   Compute PPFORM/BCOEFS at first call to gridval
      CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
 !
      WRITE(*,'(/a)') '*** Checking solutions'
      errsol = solana - solcal
      WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
           &    norm2(errsol) / norm2(solana)
 !
      CALL putarr(fid, '/xgrid', xgrid, 'r')
      CALL putarr(fid, '/ygrid', ygrid, '\theta')
      CALL putarr(fid, '/sol', solcal, 'Solutions')
      CALL putarr(fid, '/solana', solana,'Exact solutions')
      CALL putarr(fid, '/errors', errsol, 'Errors')
 !
 !   Check derivatives d/dx and d/dy
 !
      WRITE(*,'(/a)') '*** Checking gradient'
      DO i=0,nx
         DO j=0,ny
            IF( mbess .EQ. 0 ) THEN
               solana(i,j) = -2.0d0 * xgrid(i)
            ELSE
               solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
                    &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
            END IF
         END DO
      END DO
 !
      jder = (/1,0/)
      CALL gridval(splxy, xgrid, ygrid, solcal, jder)
      errsol = solana - solcal
      CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
      WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
 !
      DO i=0,nx
         DO j=0,ny
            solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
         END DO
      END DO
 !
      jder = (/0,1/)
      CALL gridval(splxy, xgrid, ygrid, solcal, jder)
      CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
      errsol = solana - solcal
      WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
 !
      DEALLOCATE(solcal, solana, errsol)
   END IF
 !!$!===========================================================================
 !!$!              5.0 Clear the matrix and recompute
 !!$!
 !!$  IF(me.EQ.0) WRITE(*,'(/a)') 'Recompute the solver ...'
 !!$  t0 = mpi_wtime()()
 !!$  CALL clear_mat(mat)
 !!$  CALL dismat(splxy, mat)
 !!$  CALL ibcmat(mat, ny)
 !!$  tmat = mpi_wtime()()-t0
 !!$!
 !!$  t0 = mpi_wtime()()
 !!$  CALL numfact(mat, debug=.FALSE.)
 !!$  tfact = mpi_wtime()()-t0
 !!$  gflops1 = mat%petsc_par%RINFOG(3) / tfact / 1.d9
 !!$!
 !!$  t0 = mpi_wtime()()
 !!$  ALLOCATE(newsol(nrank))
 !!$  CALL bsolve(mat, rhs, newsol)
 !!$  newsol(1:ny-1) = newsol(ny)
 !!$  tsolv = mpi_wtime()()-t0
 !!$!
 !!$  IF(me.EQ.0) THEN
 !!$     WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
 !!$     WRITE(*,'(/a)') '---'
 !!$     WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
 !!$     WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
 !!$     WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
 !!$     WRITE(*,'(a,1pe12.3)') 'Total (s)                     ', tmat+tfact+tsolv
 !!$     WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
 !!$  END IF
 !!$!
 !!$  DEALLOCATE(newsol)
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(xgrid, rhs, sol)
   DEALLOCATE(bcoef)
 !!$  CALL destroy(mat)
   CALL destroy_sp(splxy)
 !
   IF(me.EQ.0) CALL closef(fid)
   CALL mpi_finalize(ierr)
 !===========================================================================
 !
 CONTAINS
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:,:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i, j
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        DO j=1,SIZE(x,2)
           sum2 = sum2 + x(i,j)**2
        END DO
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
 !
   SUBROUTINE minmax_i(k, comm, str)
     CHARACTER(len=*), INTENT(in) :: str
     INTEGER, INTENT(in)          :: k
     INTEGER, INTENT(in)          :: comm
     INTEGER :: me, ierr, kmin, kmax
     CALL mpi_comm_rank(comm, me, ierr)
     CALL mpi_reduce(k, kmin, 1, MPI_INTEGER, MPI_MIN, 0, comm, ierr)
     CALL mpi_reduce(k, kmax, 1, MPI_INTEGER, MPI_MAX, 0, comm, ierr)
     IF( me.EQ.0 ) THEN
        WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax
     END IF
   END SUBROUTINE minmax_i
 !
   SUBROUTINE minmax_i8(k, comm, str)
     CHARACTER(len=*), INTENT(in) :: str
     INTEGER(8), INTENT(in)          :: k
     INTEGER, INTENT(in)          :: comm
     INTEGER :: me, ierr
     INTEGER(8) :: kmin, kmax
     CALL mpi_comm_rank(comm, me, ierr)
     CALL mpi_reduce(k, kmin, 1, MPI_INTEGER8, MPI_MIN, 0, comm, ierr)
     CALL mpi_reduce(k, kmax, 1, MPI_INTEGER8, MPI_MAX, 0, comm, ierr)
     IF( me.EQ.0 ) THEN
        WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax
     END IF
   END SUBROUTINE minmax_i8
 !!
   SUBROUTINE minmax_r(x, comm, str)
     CHARACTER(len=*), INTENT(in) :: str
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(in)          :: comm
     INTEGER :: me, ierr
     DOUBLE PRECISION ::  xmin, xmax
     CALL mpi_comm_rank(comm, me, ierr)
     CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr)
     CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr)
     IF( me.EQ.0 ) THEN
        WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax
     END IF
   END SUBROUTINE minmax_r
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
diff --git a/examples/pde2d_pwsmp.f90 b/examples/pde2d_pwsmp.f90
index 1cd7592..7e001a3 100644
--- a/examples/pde2d_pwsmp.f90
+++ b/examples/pde2d_pwsmp.f90
@@ -1,776 +1,776 @@
 !>
 !> @file pde2d_pwsmp.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !  Solving the 2d PDE using splines and WSMP non-symmetric matrix
 !
 !    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
 !    exact solution: f(x,y) = (1-x^2) x^m cos(my)
 !
 MODULE pde2d_pwsmp_mod
   USE bsplines
   USE pwsmp_bsplines
   IMPLICIT NONE
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dismat(spl, mat)
 !
 !   Assembly of FE matrix mat using spline spl
 !
     TYPE(spline2d), INTENT(in)       :: spl
     TYPE(wsmp_mat), INTENT(inout)    :: mat
 !
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2
     INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
     DOUBLE PRECISION:: contrib
 !
     INTEGER :: kterms         ! Number of terms in weak form
     INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
     DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
     INTEGER, ALLOCATABLE :: left1(:), left2(:)
 !
     INTEGER :: istart, iend
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !
 !   Weak form
 !
     kterms = mat%nterms
     ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
     ALLOCATE(coefs(kterms,ng1,ng2))
 !
     ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
     ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! 
 !
 !   Matrix partition
 !
     istart = mat%istart
     iend   = mat%iend
 !===========================================================================
 !              2.0 Assembly loop
 !
     ALLOCATE(left1(ng1))
     ALLOCATE(left2(ng2))
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        left1 = i
        CALL basfun(xg1, spl%sp1, fun1, left1)
        DO j=1,n2
           CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
           left2 = j
           CALL basfun(xg2, spl%sp2, fun2, left2)
 !
           DO ig1=1,ng1
              DO ig2=1,ng2
                 CALL coefeq(xg1(ig1), xg2(ig2), &
                      &      idert(:,:,ig1,ig2), &
                      &      iderw(:,:,ig1,ig2), &
                      &      coefs(:,ig1,ig2))
              END DO
           END DO
 !
           DO iw1=0,nidbas1  ! Weight function in dir 1
              igw1 = i+iw1
              DO iw2=0,nidbas2  ! Weight function in dir 2
                 igw2 = MODULO(j+iw2-1, n2) + 1
                 irow = igw2 + (igw1-1)*n2
                 IF( irow.GE.istart .AND. irow.LE.iend) THEN
                    DO it1=0,nidbas1  ! Test function in dir 1
                       igt1 = i+it1
                       DO it2=0,nidbas2  ! Test function in dir 2
                          igt2 = MODULO(j+it2-1, n2) + 1
                          jcol = igt2 + (igt1-1)*n2
 !-------------
                          contrib = 0.0d0
                          DO ig1=1,ng1
                             DO ig2=1,ng2
                                DO iterm=1,kterms
                                   contrib = contrib + &
                                        &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
                                        &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
                                        &    coefs(iterm,ig1,ig2) *                 &
                                        &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
                                        &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
                                        &    wg1(ig1) * wg2(ig2)
                                END DO
                             END DO
                          END DO
                          CALL updtmat(mat, irow, jcol, contrib)
 !-------------
                       END DO
                    END DO
                 END IF
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
     DEALLOCATE(idert, iderw, coefs)
     DEALLOCATE(left1,left2)
 !
   CONTAINS
     SUBROUTINE coefeq(x, y, idt, idw, c)
       DOUBLE PRECISION, INTENT(in) :: x, y
       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
       DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
 !
 ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
 !
       c(1) = x        ! 
       idt(1,1) = 1
       idt(1,2) = 0
       idw(1,1) = 1
       idw(1,2) = 0
       !
       c(2) = 1.d0/x
       idt(2,1) = 0
       idt(2,2) = 1
       idw(2,1) = 0
       idw(2,2) = 1
     END SUBROUTINE coefeq
   END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE disrhs(mbess, spl, rhs)
 !
 !   Assembly the RHS using 2d spline spl
 !
     INTEGER, INTENT(in) :: mbess
     TYPE(spline2d), INTENT(in) :: spl
     DOUBLE PRECISION, INTENT(out) :: rhs(:)
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
 !
     ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !===========================================================================
 !              2.0 Assembly loop
 !
     nrank = SIZE(rhs)
     rhs(1:nrank) = 0.0d0
 !
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), spl%sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                 contrib = wg1(ig1)*wg2(ig2) * &
                      &    rhseq(xg1(ig1),xg2(ig2), mbess)
                 DO k1=0,nidbas1
                    i1 = i+k1
                    DO k2=0,nidbas2
                       j2 = MODULO(j+k2-1,n2) + 1
                       ij = j2 + (i1-1)*n2
                       rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
 !
   CONTAINS
     DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
       DOUBLE PRECISION, INTENT(in) :: x1, x2
       INTEGER, INTENT(in) :: m
       rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
     END FUNCTION rhseq
   END SUBROUTINE disrhs
 !
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcmat(mat, ny)
 !
 !   Apply BC on matrix
 !
     TYPE(wsmp_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank, i, j
     DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = mat%rank
     ALLOCATE(zsum(nrank), arr(nrank))
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum on the NY-th row
 !
     zsum = 0.0d0
     DO i=1,ny
        arr = 0.0d0
        CALL getrow(mat, i, arr)
        zsum(:) = zsum(:) + arr(:)
     END DO
     CALL putrow(mat, ny, zsum)
 !
 !   The horizontal sum on the NY-th column
 !
     IF( .NOT.mat%nlsym) THEN
        zsum = 0.0d0
        DO j=1,ny
           arr = 0.0d0
           CALL getcol(mat, j, arr)
           zsum(ny:) = zsum(ny:) + arr(ny:)
        END DO
        CALL putcol(mat, ny, zsum)
     END IF
 !
 !   The away operator
 !
     IF( .NOT.mat%nlsym) THEN
        DO j = 1,ny-1
           arr = 0.0d0; arr(j) = 1.0d0
           CALL putcol(mat, j, arr)     
        END DO
     END IF
 !
     DO i = 1,ny-1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !  
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     DO j = nrank, nrank-ny+1, -1
        arr = 0.0d0; arr(j) = 1.0d0
        CALL putcol(mat, j, arr)     
     END DO
 !
     DO i = nrank, nrank-ny+1, -1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(zsum, arr)
 !
   END SUBROUTINE ibcmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcrhs(rhs, ny)
 !
 !   Apply BC on RHS
 !
     DOUBLE PRECISION, INTENT(inout) :: rhs(:)
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank
     DOUBLE PRECISION :: zsum
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = SIZE(rhs,1)
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum
 !
     zsum = SUM(rhs(1:ny))
     rhs(ny) = zsum
     rhs(1:ny-1) = 0.0d0
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     rhs(nrank-ny+1:nrank) = 0.0d0
   END SUBROUTINE ibcrhs
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE pde2d_pwsmp_mod
 PROGRAM main
   USE pde2d_pwsmp_mod
   USE futils
 !
   IMPLICIT NONE
   INCLUDE 'mpif.h'
   INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
   LOGICAL :: nlppform
   INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
   DOUBLE PRECISION :: pi, coefx(5), coefy(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
   TYPE(spline2d) :: splxy
   TYPE(wsmp_mat) :: mat
 !
   CHARACTER(len=128) :: file='pde2d_wsmp.h5'
   INTEGER :: fid
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
   DOUBLE PRECISION :: seconds, mem, dopla
   DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
   DOUBLE PRECISION :: tconv, treord
   INTEGER :: nits=100
   LOGICAL :: nlsym, nlforce_zero
 !
   INTEGER :: ierr, me, nprocs
   DOUBLE PRECISION :: mem_loc, mem_min, mem_max
 !
   NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, &
        &            nlsym, nlforce_zero, coefx, coefy
 !===========================================================================
 !              1.0 Prologue
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
   CALL mpi_comm_size(MPI_COMM_WORLD, nprocs, ierr)
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intervals in x
   ny = 8              ! Number of intervals in y
   nidbas = (/3,3/)    ! Degree of splines
   ngauss = (/4,4/)    ! Number of Gauss points/interval
   mbess = 2           ! Exponent of differential problem
   nterms = 2          ! Number of terms in weak form
   nlppform = .TRUE.   ! Use PPFORM for gridval or not
   nlsym = .TRUE.      ! Symmetric matrix or not
   nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   IF(me.EQ.0) THEN
      READ(*,newrun)
      WRITE(*,newrun)
   END IF
   CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(mbess, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nterms, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nlppform, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nlforce_zero, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(coefx, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(coefy, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
 !
 !   Define grid on x (=radial) & y (=poloidal) axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
 !
 !   Create hdf5 file
 !
   if(me.eq.0) then
      CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
      CALL attach(fid, '/', 'NX', nx)
      CALL attach(fid, '/', 'NY', ny)
      CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
      CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
      CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
      CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
      CALL attach(fid, '/', 'MBESS', mbess)
   end if
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   t0 = seconds()
   CALL set_spline(nidbas, ngauss, &
        &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
 !
 !   FE matrix assembly
 !
   nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
   IF(me.EQ.0) THEN
      WRITE(*,'(a,i8,a,i4)') 'nrank =', nrank, '   nprocs =', nprocs
   END IF
 !
   CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero, nlsym=nlsym, &
        &    comm_in=MPI_COMM_WORLD)
   WRITE(*,'(a,i4.4,a,3i16)') 'PE', me, ' istart, iend, nloc', mat%istart, mat%iend, &
        &        mat%iend-mat%istart+1
 !
   CALL dismat(splxy, mat)
 !
 !   BC on Matrix
 !
   CALL ibcmat(mat, ny)
   tmat = seconds() - t0
 !
 !   RHS assembly
 !
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(mbess, splxy, rhs)
 !
 !   BC on RHS
 !
   CALL ibcrhs(rhs, ny)
 !
   mem_loc = mem()
   CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after DISMAT')
   IF(me.EQ.0) THEN
      CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
   END IF
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
   t0 = seconds()
   CALL to_mat(mat)
   tconv = seconds() -t0
   CALL minmax_i(mat%nnz_loc, MPI_COMM_WORLD, 'local nnz')
   IF(me.EQ.0) THEN 
      WRITE(*,'(a,i16)') 'Number of non-zeros of matrix = ', mat%nnz
   END IF
 !
   t0 = seconds()
   CALL reord_mat(mat)
   treord = seconds() - t0
 !
   t0 = seconds()
   CALL numfact(mat)
   tfact = seconds() - t0
 !
   mem_loc = mem()
   CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after FACTOR')
 ! 
   IF(me.EQ.0) THEN
      WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24)
      WRITE(*,'(a,1pe12.3)')  'Number of factorization GFLOPS      = ',mat%p%dparm(23)/1.d9
   END IF
   gflops1 = mat%p%dparm(23) / tfact / 1.d9
 !
   CALL bsolve(mat, rhs, sol)
 !
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL bsolve(mat, rhs, sol)
      sol(1:ny-1) = sol(ny)
   END DO
   tsolv = (seconds() - t0)/REAL(nits)
 !
   mem_loc = mem()
   CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after BSOLVE')
 !
 !   Spline coefficients, taking into account of periodicity in y
 !   Note: in SOL, y was numbered first.
 !
   dimx = splxy%sp1%dim
   dimy = splxy%sp2%dim
   ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,ny) + i*ny + 1
         bcoef(i,j) = sol(ij)
      END DO
   END DO
   IF(me.EQ.0) THEN
      WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
      CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
   END IF
 !===========================================================================
 !              4.0 Check the solution
 !
 !   Check function values computed with various method (only on proc 0)
 !
   IF(me.EQ.0) THEN
      ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
      DO i=0,nx
         DO j=0,ny
            solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
         END DO
      END DO
      jder = (/0,0/)
 !
 !   Compute PPFORM/BCOEFS at first call to gridval
      CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
 !
      WRITE(*,'(/a)') '*** Checking solutions'
      t0 = seconds()
      DO it=1,nits   ! nits iterations for timing
         CALL gridval(splxy, xgrid, ygrid, solcal, jder)
      END DO
      tgrid = (seconds() - t0)/REAL(nits)
      errsol = solana - solcal
      IF( SIZE(bcoef,2) .LE. 10 ) THEN
         CALL prnmat('BCOEF', bcoef)
         CALL prnmat('SOLANA', solana)
         CALL prnmat('SOLCAL', solcal)
         CALL prnmat('ERRSOL', errsol)
      END IF
      WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
           &    norm2(errsol) / norm2(solana)
      WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
 !
      CALL putarr(fid, '/xgrid', xgrid, 'r')
      CALL putarr(fid, '/ygrid', ygrid, '\theta')
      CALL putarr(fid, '/sol', solcal, 'Solutions')
      CALL putarr(fid, '/solana', solana,'Exact solutions')
      CALL putarr(fid, '/errors', errsol, 'Errors')
 !
 !   Check derivatives d/dx and d/dy
 !
      WRITE(*,'(/a)') '*** Checking gradient'
      DO i=0,nx
         DO j=0,ny
            IF( mbess .EQ. 0 ) THEN
               solana(i,j) = -2.0d0 * xgrid(i)
            ELSE
               solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
                    &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
            END IF
         END DO
      END DO
 !
      jder = (/1,0/)
      CALL gridval(splxy, xgrid, ygrid, solcal, jder)
      errsol = solana - solcal
      CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
      WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
 !
      DO i=0,nx
         DO j=0,ny
            solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
         END DO
      END DO
 !
      jder = (/0,1/)
      CALL gridval(splxy, xgrid, ygrid, solcal, jder)
      CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
      errsol = solana - solcal
      WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
 !
      WRITE(*,'(/a)') '---'
      WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
      WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s)   ', tconv
      WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s)      ', treord
      WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
      WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
      WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s)         ', tsolv
      WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
      DEALLOCATE(solcal)
      DEALLOCATE(solana)
      DEALLOCATE(errsol)
   END IF
 !===========================================================================
 !              5.0 Clear the matrix and recompute
 !
   IF(me.EQ.0) WRITE(*,'(/a)') 'Recompute the solver ...'
   t0 = seconds()
   CALL clear_mat(mat)
   CALL dismat(splxy, mat)
   CALL ibcmat(mat, ny)
   tmat = seconds()-t0
 !
   t0 = seconds()
 !!$  CALL numfact(mat)
   CALL factor(mat, nlreord=.FALSE.)
   tfact = seconds()-t0
   gflops1 = mat%p%dparm(23) / tfact / 1.d9
 !
   t0 = seconds()
   ALLOCATE(newsol(nrank))
   CALL bsolve(mat, rhs, newsol)
   newsol(1:ny-1) = newsol(ny)
   tsolv = seconds()-t0
 !
   IF(me.EQ.0) THEN
      WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
      WRITE(*,'(/a)') '---'
      WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
      WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
      WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s)         ', tsolv
      WRITE(*,'(a,1pe12.3)') 'Total (s)                     ', tmat+tfact+tsolv
      WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
   END IF
 !
   DEALLOCATE(newsol)
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(bcoef)
   DEALLOCATE(xgrid, rhs, sol)
 9999 CONTINUE
   CALL mpi_barrier(MPI_COMM_WORLD, ierr)
   CALL destroy_sp(splxy)
   CALL destroy(mat)
 !
   IF(me.EQ.0) CALL closef(fid)
   CALL mpi_finalize(ierr)
 !===========================================================================
 !
 CONTAINS
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:,:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i, j
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        DO j=1,SIZE(x,2)
           sum2 = sum2 + x(i,j)**2
        END DO
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
   SUBROUTINE prnmat(label, mat)
     CHARACTER(len=*) :: label
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
     INTEGER :: i
     WRITE(*,'(/a)') TRIM(label)
     DO i=1,SIZE(mat,1)
        WRITE(*,'(10(1pe12.3))') mat(i,:)
     END DO
   END SUBROUTINE prnmat
 !
   SUBROUTINE minmax_i(k, comm, str)
     CHARACTER(len=*), INTENT(in) :: str
     INTEGER, INTENT(in)          :: k
     INTEGER, INTENT(in)          :: comm
     INTEGER :: me, ierr, kmin, kmax
     CALL mpi_comm_rank(comm, me, ierr)
     CALL mpi_reduce(k, kmin, 1, MPI_INTEGER, MPI_MIN, 0, comm, ierr)
     CALL mpi_reduce(k, kmax, 1, MPI_INTEGER, MPI_MAX, 0, comm, ierr)
     IF( me.EQ.0 ) THEN
        WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax
     END IF
   END SUBROUTINE minmax_i
 !
   SUBROUTINE minmax_r(x, comm, str)
     CHARACTER(len=*), INTENT(in) :: str
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(in)          :: comm
     INTEGER :: me, ierr
     DOUBLE PRECISION ::  xmin, xmax
     CALL mpi_comm_rank(comm, me, ierr)
     CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr)
     CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr)
     IF( me.EQ.0 ) THEN
        WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax
     END IF
   END SUBROUTINE minmax_r
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
diff --git a/examples/pde2d_sym_pardiso.f90 b/examples/pde2d_sym_pardiso.f90
index 47c7cda..fcd23bf 100644
--- a/examples/pde2d_sym_pardiso.f90
+++ b/examples/pde2d_sym_pardiso.f90
@@ -1,715 +1,715 @@
 !>
 !> @file pde2d_sym_pardiso.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !  Solving the 2d PDE using splines and PARDISO symmetric matrix
 !
 !    -d/dx[x C d/dx]f - 1x/d/dy[Cd/dy] f = \rho, with f(x=1,y) = 0
 !     C(x,y) = 1 + \epsilon x cos(y)
 !     exact solution: f(x,y) = (1-x^2) x^m cos(my)
 !
 MODULE pde2d_sym_pardiso_mod
   USE bsplines
   USE pardiso_bsplines
   IMPLICIT NONE
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dismat(spl, epsi, mat)
 !
 !   Assembly of FE matrix mat using spline spl
 !
     TYPE(spline2d), INTENT(in)       :: spl
     DOUBLE PRECISION, INTENT(in)     :: epsi
     TYPE(pardiso_mat), INTENT(inout) :: mat
 !
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2
     INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
     DOUBLE PRECISION:: contrib
 !
     INTEGER :: kterms         ! Number of terms in weak form
     INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
     DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
     INTEGER, ALLOCATABLE :: left1(:), left2(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
     WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
     WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !
 !   Weak form
 !
     kterms = mat%nterms
     ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
     ALLOCATE(coefs(kterms,ng1,ng2))
 !
     ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
     ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! 
 !===========================================================================
 !              2.0 Assembly loop
 !
     ALLOCATE(left1(ng1))
     ALLOCATE(left2(ng2))
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        left1 = i
        CALL basfun(xg1, spl%sp1, fun1, left1)
        DO j=1,n2
           CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
           left2 = j
           CALL basfun(xg2, spl%sp2, fun2, left2)
 !
           DO ig1=1,ng1
              DO ig2=1,ng2
                 CALL coefeq(xg1(ig1), xg2(ig2), &
                      &      idert(:,:,ig1,ig2), &
                      &      iderw(:,:,ig1,ig2), &
                      &      coefs(:,ig1,ig2))
              END DO
           END DO
 !
           DO iw1=0,nidbas1  ! Weight function in dir 1
              igw1 = i+iw1
              DO iw2=0,nidbas2  ! Weight function in dir 2
                 igw2 = MODULO(j+iw2-1, n2) + 1
                 irow = igw2 + (igw1-1)*n2
                 DO it1=0,nidbas1  ! Test function in dir 1
                    igt1 = i+it1
                    DO it2=0,nidbas2  ! Test function in dir 2
                       igt2 = MODULO(j+it2-1, n2) + 1
                       jcol = igt2 + (igt1-1)*n2
 !-------------
                       contrib = 0.0d0
                       DO ig1=1,ng1
                          DO ig2=1,ng2
                             DO iterm=1,kterms
                                contrib = contrib + &
                                     &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
                                     &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
                                     &    coefs(iterm,ig1,ig2) *                 &
                                     &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
                                     &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
                                     &    wg1(ig1) * wg2(ig2)
                             END DO
                          END DO
                       END DO
                       CALL updtmat(mat, irow, jcol, contrib)
 !-------------
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
     DEALLOCATE(idert, iderw, coefs)
     DEALLOCATE(left1,left2)
 !
   CONTAINS
     SUBROUTINE coefeq(x, y, idt, idw, c)
       DOUBLE PRECISION, INTENT(in) :: x, y
       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
       DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
 !
       DOUBLE PRECISION :: zcoef
 !
 ! Weak form = Int(x*C*dw/dx*dt/dx + C/x*dw/dy*dt/dy)dxdy
 !      C(x,y) = 1 + epsilon*x*cos(y)
 !
       zcoef = 1.0d0 + epsi*x*COS(y)
 !
       c(1) = x*zcoef        ! 
       idt(1,1) = 1
       idt(1,2) = 0
       idw(1,1) = 1
       idw(1,2) = 0
 !
       c(2) = zcoef/x
       idt(2,1) = 0
       idt(2,2) = 1
       idw(2,1) = 0
       idw(2,2) = 1
     END SUBROUTINE coefeq
   END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE disrhs(mbess, epsi, spl, rhs)
 !
 !   Assembly the RHS using 2d spline spl
 !
     INTEGER, INTENT(in)           :: mbess
     DOUBLE PRECISION, INTENT(in)  :: epsi
     TYPE(spline2d), INTENT(in)    :: spl
     DOUBLE PRECISION, INTENT(out) :: rhs(:)
 !
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
 !
     ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !===========================================================================
 !              2.0 Assembly loop
 !
     nrank = SIZE(rhs)
     rhs(1:nrank) = 0.0d0
 !
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), spl%sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                 contrib = wg1(ig1)*wg2(ig2) * &
                      &    rhseq(xg1(ig1),xg2(ig2), mbess)
                 DO k1=0,nidbas1
                    i1 = i+k1
                    DO k2=0,nidbas2
                       j2 = MODULO(j+k2-1,n2) + 1
                       ij = j2 + (i1-1)*n2
                       rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
 !
   CONTAINS
     DOUBLE PRECISION FUNCTION rhseq(x, y, m)
       DOUBLE PRECISION, INTENT(in) :: x, y
       INTEGER, INTENT(in) :: m
       DOUBLE PRECISION    :: xm
 !
       xm = REAL(m,8)
       rhseq = x**(m+1) * ( 4.0d0*(xm+1.0d0)*COS(xm*y) + &
            &  epsi*x*( &
            &    ( (3.0d0*(xm+1.0d0) - xm/x**2)*COS((xm-1.0d0)*y) + &
            &     (3.0d0+2.0d0*xm)*COS((xm+1.0d0)*y) ) &
            &    ))
     END FUNCTION rhseq
   END SUBROUTINE disrhs
 !
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcmat(mat, ny)
 !
 !   Apply BC on matrix
 !
     TYPE(pardiso_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank, i, j
     DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = mat%rank
     ALLOCATE(zsum(nrank), arr(nrank))
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum on the NY-th row
 !
     zsum = 0.0d0
     DO i=1,ny
        arr = 0.0d0
        CALL getrow(mat, i, arr)
        zsum(:) = zsum(:) + arr(:)
     END DO
     zsum(ny) = SUM(zsum(1:ny))   ! using symmetry
     CALL putrow(mat, ny, zsum)
 !
 !   The away operator
 !
     DO i = 1,ny-1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !  
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     DO i = nrank, nrank-ny+1, -1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(zsum, arr)
 !
   END SUBROUTINE ibcmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcrhs(rhs, ny)
 !
 !   Apply BC on RHS
 !
     DOUBLE PRECISION, INTENT(inout) :: rhs(:)
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank
     DOUBLE PRECISION :: zsum
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = SIZE(rhs,1)
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum
 !
     zsum = SUM(rhs(1:ny))
     rhs(ny) = zsum
     rhs(1:ny-1) = 0.0d0
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     rhs(nrank-ny+1:nrank) = 0.0d0
   END SUBROUTINE ibcrhs
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE pde2d_sym_pardiso_mod
 PROGRAM main
   USE pde2d_sym_pardiso_mod
   USE futils
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
   LOGICAL :: nlppform
   INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
   DOUBLE PRECISION :: pi, epsi, coefx(5), coefy(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
   TYPE(spline2d) :: splxy
   TYPE(pardiso_mat) :: mat
 !
   CHARACTER(len=128) :: file='pde2d_sym_pardiso.h5'
   INTEGER :: fid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
   DOUBLE PRECISION :: seconds, mem, dopla
   DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
   DOUBLE PRECISION :: tconv, treord
   INTEGER :: nits=100
   LOGICAL :: nlmetis, nlforce_zero, nlpos
 !
   NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, epsi, nlppform, nlmetis, &
        &            nlforce_zero, nlpos, coefx, coefy
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intervals in x
   ny = 8              ! Number of intervals in y
   nidbas = (/3,3/)    ! Degree of splines
   ngauss = (/4,4/)    ! Number of Gauss points/interval
   mbess = 2           ! Exponent of differential problem
   epsi = 0.5          ! Non-uniformity in the Laplacian coefficicient
   nterms = 2          ! Number of terms in weak form
   nlppform = .TRUE.   ! Use PPFORM for gridval or not
   nlmetis = .FALSE.   ! Use metis ordering or minimum degree
   nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
   nlpos = .TRUE.         ! Matrix is positive definite
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x (=radial) & y (=poloidal) axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NY', ny)
   CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
   CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
   CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
   CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
   CALL attach(fid, '/', 'MBESS', mbess)
   CALL attach(fid, '/', 'EPSI', epsi)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   CALL set_spline(nidbas, ngauss, &
        &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
 !
 !   FE matrix assembly
 !
   nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
   WRITE(*,'(a,i8)') 'nrank', nrank
 !
   t0 = seconds()
   CALL init(nrank, nterms, mat, nlsym=.TRUE.)
   CALL dismat(splxy, epsi, mat)
   ALLOCATE(arr(nrank))
   IF(nrank.LT.100) THEN
      DO i=1,nrank
         CALL getele(mat, i, i, arr(i))
      END DO
      WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr
   END IF
 !
 !   BC on Matrix
 !
   WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
   WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
   CALL ibcmat(mat, ny)
   IF(nrank.LT.100) THEN
      DO i=1,nrank
         CALL getele(mat, i, i, arr(i))
      END DO
      WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr
      WRITE(*,'(a)') 'Last rows'
      DO i=nrank-ny,nrank
         CALL getrow(mat, i, arr)
         WRITE(*,'(10(1pe12.3))') arr
      END DO
   END IF
   tmat = seconds() - t0
 !
 !   RHS assembly
 !
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(mbess, epsi, splxy, rhs)
 !
 !   BC on RHS
 !
   CALL ibcrhs(rhs, ny)
 !
   CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
   WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
   t0 = seconds()
 !!$  CALL factor(mat)
 !
 !   The call to "factor" could be split into the
 !   3 following calls
 !
   CALL to_mat(mat)
   tconv = seconds() -t0
   WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat)
   WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem()
 !
   t0 = seconds()
   CALL reord_mat(mat, nlmetis=nlmetis, debug=.FALSE.)
   CALL putmat(fid, '/MAT', mat)
   treord = seconds() - t0
 !
   t0 = seconds()
   CALL numfact(mat, debug=.FALSE.)
   tfact = seconds() - t0
  
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
   WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A  = ',mat%p%iparm(18)
   WRITE(*,'(a,i12)')  'Number of factorization MFLOPS      = ',mat%p%iparm(19)
   gflops1 = mat%p%iparm(19) / tfact / 1.d3
 !
   CALL bsolve(mat, rhs, sol, debug=.FALSE.)
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL bsolve(mat, rhs, sol)
      sol(1:ny-1) = sol(ny)
   END DO
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
   tsolv = (seconds() - t0)/REAL(nits)
 !
 !   Spline coefficients, taking into account of periodicity in y
 !   Note: in SOL, y was numbered first.
 !
   dimx = splxy%sp1%dim
   dimy = splxy%sp2%dim
   ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,ny) + i*ny + 1
         bcoef(i,j) = sol(ij)
      END DO
   END DO
   WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
   CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
 !===========================================================================
 !              4.0 Check the solution
 !
 !   Check function values computed with various method
 !
   ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
      END DO
   END DO
   jder = (/0,0/)
 !
 !   Compute PPFORM/BCOEFS at first call to gridval
   CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
 !
   WRITE(*,'(/a)') '*** Checking solutions'
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   END DO
   tgrid = (seconds() - t0)/REAL(nits)
   errsol = solana - solcal
   IF( SIZE(bcoef,2) .LE. 10 ) THEN
      CALL prnmat('BCOEF', bcoef)
      CALL prnmat('SOLANA', solana)
      CALL prnmat('SOLCAL', solcal)
      CALL prnmat('ERRSOL', errsol)
   END IF
   WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
        &    norm2(errsol) / norm2(solana)
   WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
 !
   CALL putarr(fid, '/xgrid', xgrid, 'r')
   CALL putarr(fid, '/ygrid', ygrid, '\theta')
   CALL putarr(fid, '/sol', solcal, 'Solutions')
   CALL putarr(fid, '/solana', solana,'Exact solutions')
   CALL putarr(fid, '/errors', errsol, 'Errors')
 !
 !   Check derivatives d/dx and d/dy
 !
   WRITE(*,'(/a)') '*** Checking gradient'
   DO i=0,nx
      DO j=0,ny
         IF( mbess .EQ. 0 ) THEN
            solana(i,j) = -2.0d0 * xgrid(i)
         ELSE
            solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
              &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
         END IF
      END DO
   END DO
 !
   jder = (/1,0/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   errsol = solana - solcal
   CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
 !
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
      END DO
   END DO
 !
   jder = (/0,1/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
   errsol = solana - solcal
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
 !
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s)   ', tconv
   WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s)      ', treord
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
   WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s)         ', tsolv
   WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
 !===========================================================================
 !              5.0 Clear the matrix and recompute
 !
   WRITE(*,'(/a)') 'Recompute the solver ...'
   t0 = seconds()
   CALL clear_mat(mat)
   CALL dismat(splxy, epsi, mat)
   CALL ibcmat(mat, ny)
   tmat = seconds()-t0
 !
   t0 = seconds()
 !!$  CALL numfact(mat, debug=.FALSE.)
   CALL factor(mat, nlreord=.FALSE., debug=.FALSE.)
   tfact = seconds()-t0
   gflops1 = mat%p%iparm(19) / tfact / 1.d3
 !
   t0 = seconds()
   ALLOCATE(newsol(nrank))
   CALL bsolve(mat, rhs, newsol)
   newsol(1:ny-1) = newsol(ny)
   tsolv = seconds()-t0
 !
   WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s)         ', tsolv
   WRITE(*,'(a,1pe12.3)') 'Total (s)                     ', tmat+tfact+tsolv
   WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
 !
   DEALLOCATE(newsol)
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(xgrid, rhs, sol)
   DEALLOCATE(solcal, solana, errsol)
   DEALLOCATE(bcoef)
   DEALLOCATE(arr)
   CALL destroy_sp(splxy)
   CALL destroy(mat)
 !
   CALL closef(fid)
 !===========================================================================
 !
 CONTAINS
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:,:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i, j
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        DO j=1,SIZE(x,2)
           sum2 = sum2 + x(i,j)**2
        END DO
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
   SUBROUTINE prnmat(label, mat)
     CHARACTER(len=*) :: label
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
     INTEGER :: i
     WRITE(*,'(/a)') TRIM(label)
     DO i=1,SIZE(mat,1)
        WRITE(*,'(10(1pe12.3))') mat(i,:)
     END DO
   END SUBROUTINE prnmat
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
diff --git a/examples/pde2d_sym_pardiso_dft.f90 b/examples/pde2d_sym_pardiso_dft.f90
index 42426e6..3b77cf5 100644
--- a/examples/pde2d_sym_pardiso_dft.f90
+++ b/examples/pde2d_sym_pardiso_dft.f90
@@ -1,1034 +1,1034 @@
 !>
 !> @file pde2d_sym_pardiso_dft.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !  Solving the 2d PDE using splines and PARDISO symmetric matrix.
 !  The periodic coordinate y is discrete Fourier transformed.
 !
 !    -d/dx[x C d/dx]f - 1x/d/dy[Cd/dy] f = \rho, with f(x=1,y) = 0
 !     C(x,y) = 1 + \epsilon x cos(y)
 !     exact solution: f(x,y) = (1-x^2) x^m cos(my)
 !
 MODULE pde2d_sym_pardiso_dft_mod
   USE bsplines
   USE pardiso_bsplines
   IMPLICIT NONE
 !
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dismat(spl, epsi, mat)
 !
 !   Assembly of FE matrix mat using spline spl
 !
     TYPE(spline2d), INTENT(in)       :: spl
     DOUBLE PRECISION, INTENT(in)     :: epsi
     TYPE(zpardiso_mat), INTENT(inout) :: mat
 !
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: kmin, kmax, dk
     INTEGER :: i, j, ig1, ig2, kc
     INTEGER :: iterm, iw1, mw, igw1, it1, mt, igt1, irow, jcol
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:)
     DOUBLE COMPLEX, ALLOCATABLE   :: ft_fun2(:,:,:), fft_temp(:)
     DOUBLE COMPLEX :: contrib
 !
     INTEGER :: kterms         ! Number of terms in weak form
     INTEGER :: kcoupl         ! Number of mode couplings
     INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
     DOUBLE COMPLEX, ALLOCATABLE  :: coefs(:,:,:,:)          ! Terms in weak form
     INTEGER, ALLOCATABLE :: left1(:), left2(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
     kmin = spl%sp2%dft%kmin
     kmax = spl%sp2%dft%kmax
     dk   = spl%sp2%dft%dk
     WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
     WRITE(*,'(a, 5i6)')  'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
     WRITE(*,'(a, 5i6)')  'kmin, kmax, dk     =', kmin, kmax, dk
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng2), wg2(ng2))
 !
 !   Weak form
 !
     kterms = mat%nterms
     kcoupl = SIZE(spl%sp2%dft%mode_couplings)
     ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
     ALLOCATE(coefs(kterms,kcoupl,ng1,ng2))
 !
 !   Splines and derivatives at all Gauss points
 !
     ALLOCATE(fun1(0:nidbas1,0:1,ng1))    ! Spline and 1st derivative
     ALLOCATE(ft_fun2(kmin:kmax,0:1,ng2)) ! DFT of splines and 1st derivative
     ALLOCATE(fft_temp(0:n2-1))           ! Used in coefeq
 !===========================================================================
 !              2.0 Assembly loop
 !
     ALLOCATE(left1(ng1))
     ALLOCATE(left2(ng2))
 !
 ! First interval in 2nd (periodic) coordinate
 !
     j = 1
     CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2)
     left2 = j
     CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2)
 
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        left1 = i
        CALL basfun(xg1, spl%sp1, fun1, left1)
        DO ig1=1,ng1
           DO ig2=1,ng2
              CALL coefeq(xg1(ig1), xg2(ig2), &
                   &      idert(:,:,ig1,ig2), &
                   &      iderw(:,:,ig1,ig2), &
                   &      coefs(:,:,ig1,ig2))
           END DO
        END DO
 !
        DO iw1=0,nidbas1     ! Weight function in dir 1
           igw1 = i+iw1
           DO it1=0,nidbas1  ! Test function in dir 1
              igt1 = i+it1
              DO mt=kmin,kmax ! Test Fourier mode
                 DO kc=1,kcoupl
                    mw = mt + spl%sp2%dft%mode_couplings(kc)
                    IF(mw.LT.kmin .OR. mw.GT.kmax) CYCLE
 !-------------
                    contrib = (0.0d0, 0.0d0)
                    DO ig1=1,ng1
                       DO ig2=1,ng2
                          DO iterm=1,kterms
                             contrib = contrib + &
                               &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
                               &    ft_fun2(mw,iderw(iterm,2,ig1,ig2),ig2) * &
                               &    coefs(iterm,kc,ig1,ig2) *                 &
                               &    CONJG(ft_fun2(mt,idert(iterm,2,ig1,ig2),ig2)) * &
                               &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
                               &    wg1(ig1) * wg2(ig2) /REAL(n2,8)
                          END DO
                       END DO
                    END DO
                    irow = (igw1-1)*dk + (mw-kmin)+1  ! Number first mode m then radial coord.
                    jcol = (igt1-1)*dk + (mt-kmin)+1
                    CALL updtmat(mat, irow, jcol, contrib)
 !-------------
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, ft_fun2)
     DEALLOCATE(idert, iderw, coefs)
     DEALLOCATE(left1,left2)
     DEALLOCATE(fft_temp)
 !
   CONTAINS
     SUBROUTINE coefeq(x, y, idt, idw, c)
       USE fft
       DOUBLE PRECISION, INTENT(in) :: x, y
       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
       DOUBLE COMPLEX, INTENT(out) :: c(:,:)
 !
       DOUBLE PRECISION :: zcoef, dy
       INTEGER :: j, k, kc, kp
 !
 ! Weak form = Int(x*C*dw/dx*dt/dx + C/x*dw/dy*dt/dy)dxdy
 !      C(x,y) = 1 + epsilon*x*cos(y)
 !
       dy = spl%sp2%dft%dx
       kc = SIZE(spl%sp2%dft%mode_couplings)
       DO j=0,n2-1
          fft_temp(j) = 1.0d0+epsi*x*COS(y+j*dy)
       END DO
       CALL fourcol(fft_temp,1)
       DO k=1,kc
          kp = spl%sp2%dft%mode_couplings(k)
          IF(kp.LT.0) kp=kp+n2
          c(1,k) = x*fft_temp(kp)
          c(2,k) = fft_temp(kp)/x
       END DO
 !!$      WRITE(*,'(a/(10(1pe12.4)))') 'fft_temp', ABS(fft_temp)
 !!$      WRITE(*,'(a/(10(1pe12.4)))') 'c1', ABS(c(1,:))
 !!$      WRITE(*,'(a/(10(1pe12.4)))') 'c2', ABS(c(2,:))
 !
       idt(1,1) = 1
       idt(1,2) = 0
       idw(1,1) = 1
       idw(1,2) = 0
 !
       idt(2,1) = 0
       idt(2,2) = 1
       idw(2,1) = 0
       idw(2,2) = 1
     END SUBROUTINE coefeq
   END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE disrhs(mbess, epsi, spl, rhs)
 !
 !   Assembly the RHS using 2d spline spl
 !
     INTEGER, INTENT(in)           :: mbess
     DOUBLE PRECISION, INTENT(in)  :: epsi
     TYPE(spline2d), INTENT(in)    :: spl
     DOUBLE PRECISION, INTENT(out) :: rhs(:)
 !
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
 !
     ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !===========================================================================
 !              2.0 Assembly loop
 !
     nrank = SIZE(rhs)
     rhs(1:nrank) = 0.0d0
 !
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), spl%sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                 contrib = wg1(ig1)*wg2(ig2) * &
                      &    rhseq(xg1(ig1),xg2(ig2), mbess)
                 DO k1=0,nidbas1
                    i1 = i+k1
                    DO k2=0,nidbas2
                       j2 = MODULO(j+k2-1,n2) + 1
                       ij = j2 + (i1-1)*n2
                       rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
 !
   CONTAINS
     DOUBLE PRECISION FUNCTION rhseq(x, y, m)
       DOUBLE PRECISION, INTENT(in) :: x, y
       INTEGER, INTENT(in)          :: m
 !
       DOUBLE PRECISION :: xm
       xm = REAL(m,8)
       rhseq = x**(m+1) * ( 4.0d0*(xm+1.0d0)*COS(xm*y) + &
            &  epsi*x*( &
            &    ( (3.0d0*(xm+1.0d0) - xm/x**2)*COS((xm-1.0d0)*y) + &
            &     (3.0d0+2.0d0*xm)*COS((xm+1.0d0)*y) ) &
            &    ))
     END FUNCTION rhseq
   END SUBROUTINE disrhs
 !
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcmat(mat, dft)
 !
 !   Apply BC on matrix
 !
     TYPE(zpardiso_mat), INTENT(inout) :: mat
     TYPE(dftmap), INTENT(in)          :: dft
     INTEGER :: nrank, k, kmin, kmax, dk, i
     DOUBLE COMPLEX :: arr(mat%rank)
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = mat%rank
     kmin = dft%kmin
     kmax = dft%kmax
     dk   = dft%dk
 !===========================================================================
 !              2.0 BC at the axis
 !
 !   zero for non-zero modes
 !
     DO k=kmin,kmax
        IF(k.NE.0) THEN
           i = k-kmin+1
           arr = 0.0d0; arr(i) = 1.0d0
           CALL putrow(mat, i, arr)
        END IF
     END DO
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     DO i = nrank, nrank-dk+1, -1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !
   END SUBROUTINE ibcmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcrhs(rhs, dft)
 !
 !   Apply BC on RHS
 !
     DOUBLE COMPLEX, INTENT(inout) :: rhs(:)
     TYPE(dftmap), INTENT(in)      :: dft
     INTEGER :: nrank, kmin, kmax, dk, k
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = SIZE(rhs,1)
     kmin = dft%kmin
     kmax = dft%kmax
     dk = dft%dk
 !===========================================================================
 !              2.0 BC at the axis
 !
 !   zero for non-zero modes
 !
     DO k=kmin,kmax
        IF(k.NE.0) rhs(k-kmin+1) = 0.0
     END DO
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     rhs(nrank-dk+1:nrank) = 0.0d0
   END SUBROUTINE ibcrhs
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE spectrum0(spl, carr, xpt, cspec)
 !
 !  DFT modes at xpt (integration on the first interval)
 !
     DOUBLE COMPLEX, PARAMETER   :: ci = (0.0d0,1.0d0)
     DOUBLE PRECISION, PARAMETER :: pi = 3.141592653589793d0
     TYPE(spline2d), INTENT(in)   :: spl
     DOUBLE COMPLEX, INTENT(in)   :: carr(:)
     DOUBLE PRECISION, INTENT(in) :: xpt
     DOUBLE COMPLEX, INTENT(out)  :: cspec(:)
 !
     INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
     INTEGER :: k,  kmin, kmax, dk, kk
     INTEGER :: ng2, ig2
     INTEGER, ALLOCATABLE :: left2(:)
     DOUBLE PRECISION :: temp(1)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:)
     DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:,:), psi(:), coefs(:,:)
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
     kmin = spl%sp2%dft%kmin
     kmax = spl%sp2%dft%kmax
     dk   = spl%sp2%dft%dk
 !
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(left2(ng2))
     ALLOCATE(xg2(ng2), wg2(ng2))
     ALLOCATE(ft_fun2(kmin:kmax,1,ng2)) ! DFT of splines
     ALLOCATE(psi(kmin:kmax))
 !
 !  Integration over first interval
 !
     left2 = 1
     CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2)
     CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2)
     psi = (0.0d0,0.0d0)
     DO k=kmin,kmax
        DO ig2=1,ng2
           psi(k)  = psi(k) + wg2(ig2)*EXP(k*ci*xg2(ig2))*CONJG(ft_fun2(k,1,ig2))
        END DO
     END DO
 !
     ALLOCATE(coefs(dk,ndim1))
     coefs = RESHAPE(carr, SHAPE(coefs))
     temp = xpt
     DO kk=kmin,kmax
        k=kk-kmin+1
        coefs(k,:) = psi(kk)*coefs(k,:)
        CALL gridval(spl%sp1, temp, cspec(k:k), 0, coefs(k,:))
     END DO
     cspec = cspec/(2.0d0*pi)
 !
     DEALLOCATE(left2)
     DEALLOCATE(xg2, wg2)
     DEALLOCATE(ft_fun2)
     DEALLOCATE(psi)
     DEALLOCATE(coefs)    
   END SUBROUTINE spectrum0
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE spectrum1(spl, carr, xpt, ypt0, cspec)
 !
 !  DFT modes at xpt (at the initial ypt0)
 !
     DOUBLE COMPLEX, PARAMETER   :: ci = (0.0d0,1.0d0)
 !
     TYPE(spline2d), INTENT(in)   :: spl
     DOUBLE COMPLEX, INTENT(in)   :: carr(:)
     DOUBLE PRECISION, INTENT(in) :: xpt, ypt0
     DOUBLE COMPLEX, INTENT(out)  :: cspec(:)
 !
     INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
     INTEGER :: k,  kmin, kmax, dk
     DOUBLE PRECISION :: temp(1)
     DOUBLE COMPLEX :: ctemp(1)
     DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:), coefs(:,:)
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
     kmin = spl%sp2%dft%kmin
     kmax = spl%sp2%dft%kmax
     dk   = spl%sp2%dft%dk
 !
 ! DFT of splines at ypt0
     ALLOCATE(ft_fun2(kmin:kmax,1)) 
     ALLOCATE(coefs(kmin:kmax,ndim1))
     CALL ft_basfun(ypt0, spl%sp2, ft_fun2, 1)
     coefs = RESHAPE(carr, SHAPE(coefs))
 !
     temp = xpt
     DO k=kmin,kmax
        CALL gridval(spl%sp1, temp, ctemp, 0, coefs(k,:))
        cspec(k-kmin+1) = CONJG(ft_fun2(k,1))*ctemp(1)*EXP(k*ci*ypt0)
     END DO
     cspec = cspec/REAL(n2,8)
 !
     DEALLOCATE(ft_fun2)
     DEALLOCATE(coefs)    
   END SUBROUTINE spectrum1
 !!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE spectrum2(spl, xpt, ypt0, cspec)
 !
 !  DFT modes at xpt (at the initial ypt0)
 !
     USE fft
     USE bsplines
 !
     DOUBLE COMPLEX, PARAMETER   :: ci = (0.0d0,1.0d0)
 !
     TYPE(spline2d)               :: spl
     DOUBLE PRECISION, INTENT(in) :: xpt, ypt0
     DOUBLE COMPLEX, INTENT(out)  :: cspec(:)
 !
     INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
     INTEGER :: k,  kmin, kmax, dk
     DOUBLE PRECISION, ALLOCATABLE :: ypt(:), fun(:,:)
     DOUBLE COMPLEX, ALLOCATABLE ::   ft_fun(:)
     DOUBLE PRECISION :: temp(1)
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
     kmin = spl%sp2%dft%kmin
     kmax = spl%sp2%dft%kmax
     dk   = spl%sp2%dft%dk
 !
     ALLOCATE(ypt(0:n2-1))
     ALLOCATE(fun(1, 0:n2-1))
     ALLOCATE(ft_fun(0:n2-1))
 !
 !  Function values at points ypt
 !
     ypt(0:n2-1) = ypt0 + spl%sp2%knots(0:n2-1)
     temp = xpt
     CALL gridval(spl, temp, ypt, fun, (/0,0/))
     ft_fun = fun(1,:)
 !
 !  Discrete Fourier Transform
 !
     CALL fourcol(ft_fun, 1)
     DO k=kmin,kmax
        IF(k.LT.0) THEN
           cspec(k-kmin+1) = ft_fun(k+n2)*EXP(k*ci*ypt0)
        ELSE
           cspec(k-kmin+1) = ft_fun(k)*EXP(k*ci*ypt0)
        END IF
     END DO
     cspec = cspec/REAL(n2,8)
 !
     DEALLOCATE(ypt)
     DEALLOCATE(fun)
     DEALLOCATE(ft_fun)
   END SUBROUTINE spectrum2
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE pde2d_sym_pardiso_dft_mod
 PROGRAM main
   USE pde2d_sym_pardiso_dft_mod
   USE futils
   USE fft
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms, kmin, kmax, dk
   INTEGER :: n_mode_couplings
   INTEGER, ALLOCATABLE :: mode_couplings(:)
   LOGICAL :: nlppform
   INTEGER :: i, j, ij, dimx, dimy, nrank, nrank_full, jder(2), it, i0, i0_r
   INTEGER :: k, kp, ik
   DOUBLE PRECISION :: pi, epsi, coefx(5), coefy(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
   DOUBLE COMPLEX, ALLOCATABLE   ::  crhs(:), crhs_r(:), csol(:), csol_r(:)
   TYPE(spline2d) :: splxy
   TYPE(zpardiso_mat) :: mat
 !
   CHARACTER(len=128) :: file='pde2d_sym_pardiso_dft.h5'
   INTEGER :: fid
   DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: arr, srow
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
   DOUBLE PRECISION :: seconds, mem, dopla
   DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tfour, tfour0, tgrid, gflops1
   INTEGER :: nits=100
   LOGICAL :: nlmetis, nlforce_zero, nlpos
 !
   DOUBLE PRECISION :: xpt, ypt0
   DOUBLE COMPLEX, ALLOCATABLE :: cspec0(:), cspec(:), energy_k(:)
   DOUBLE COMPLEX :: energy, energy_exact
 !
   NAMELIST /newrun/ nx, ny, nidbas, ngauss, kmin, kmax, mbess, epsi, &
        &            nlppform, nlmetis, nlforce_zero, nlpos, coefx, coefy
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intervals in x
   ny = 8              ! Number of intervals in y
   nidbas = (/3,3/)    ! Degree of splines
   ngauss = (/4,4/)    ! Number of Gauss points/interval
   kmin = -3           ! Minimum Fourier mode number
   kmax = 3            ! Maximum Fourier mode number
   mbess = 2           ! Exponent of differential problem
   epsi = 0.5          ! Non-uniformity in the Laplacian coefficicient
   nterms = 2          ! Number of terms in weak form
   nlppform = .TRUE.   ! Use PPFORM for gridval or not
   nlmetis = .FALSE.   ! Use metis ordering or minimum degree
   nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
   nlpos = .TRUE.         ! Matrix is positive definite
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !!
 !   Read table of mode couplings
 !
   READ(*,*) n_mode_couplings
   ALLOCATE(mode_couplings(n_mode_couplings))
   READ(*,*) mode_couplings
   WRITE(*,'(/a/(20i4))') 'Mode couplings', mode_couplings
 !
 !   Define grid on x (=radial) & y (=poloidal) axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
 !
 !   Exact energy
 !
   energy_exact = 2.0d0*pi/REAL(2+mbess,8)
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NY', ny)
   CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
   CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
   CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
   CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
   CALL attach(fid, '/', 'MBESS', mbess)
   CALL attach(fid, '/', 'EPSI', epsi)
   CALL attach(fid, '/', 'KMIN', kmin)
   CALL attach(fid, '/', 'KMAX', kmax)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, &
        &          (/.FALSE., .TRUE./))
 !
 !   Init DFT for spline in 2nd direction
 !
   CALL init_dft(splxy%sp2, kmin, kmax, mode_couplings)
   dk = splxy%sp2%dft%dk
 !
 !   FE matrix assembly
 !
   dimx = splxy%sp1%dim
   dimy = splxy%sp2%dim
   nrank = (nx+nidbas(1))*dk       ! Rank of restricted matrix
   nrank_full = (nx+nidbas(1))*ny  ! Rank of full matrix
 !
   ALLOCATE(rhs(nrank_full), sol(nrank_full))
   ALLOCATE(crhs(nrank_full), csol(nrank_full))
   ALLOCATE(crhs_r(nrank), csol_r(nrank))
 !
   WRITE(*,'(a,i8)') 'nrank_full', nrank_full
   WRITE(*,'(a,i8)') 'nrank     ', nrank
 !
   t0 = seconds()
   CALL init(nrank, nterms, mat, nlherm=.TRUE.)
   CALL dismat(splxy, epsi, mat)
   ALLOCATE(arr(nrank))
   ALLOCATE(srow(nrank))
   DO i=1,nrank
      CALL getrow(mat, i, arr)
      srow(i) = SUM(arr)
   END DO
 !!$  WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows before BC', REAL(srow)
 !!$  WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows before BC', AIMAG(srow)
   PRINT*, 'Sum of mat before BC', SUM(srow)
 !
 !   BC on Matrix
 !
   WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
   WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
   CALL ibcmat(mat, splxy%sp2%dft)
   tmat = seconds() - t0
   DO i=1,nrank
      CALL getrow(mat, i, arr)
      srow(i) = SUM(arr)
   END DO
 !!$  WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows after BC', REAL(srow)
 !!$  WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows after BC', AIMAG(srow)
   PRINT*, 'Sum of mat after BC', SUM(srow)
 !
   WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
 !
 !   RHS assembly
 !
   CALL disrhs(mbess, epsi, splxy, rhs)
 !
 !   Init FFT
 !
   t0 = seconds()
   CALL fourcol(crhs(1:ny),1)
   CALL fourcol(crhs(1:ny),-1)
   tfour0 = seconds()-t0
   crhs = crhs/REAL(ny,8)
 !
 !  DFT of RHS
 !
   t0 = seconds()
   crhs = rhs
   DO i=1,nx+nidbas(1)
      i0 = (i-1)*ny
      CALL fourcol(crhs(i0+1:i0+ny), 1)
   END DO
   tfour = seconds()-t0
 !
 !   Restriction in Fourier space
 !     k  = kmin:kmax  (restricted)
 !     kp = 0:ny-1     (full)
 !
   DO i=1,nx+nidbas(1)
      i0 = (i-1)*ny
      i0_r = (i-1)*dk
      DO k=kmin,kmax
         kp = k
         IF(kp.LT.0) kp = kp+ny
         crhs_r(i0_r+k-kmin+1) = crhs(i0+kp+1)
      END DO
   END DO
 !
 !   BC on RHS
 !
   CALL ibcrhs(crhs_r, splxy%sp2%dft)
 !
   IF(nrank.LT.100) THEN
      WRITE(*,'(a/(10(1pe12.3)))') 'Real of crhs', REAL(crhs)
      WRITE(*,'(a/(10(1pe12.3)))') 'Imag of crhs', AIMAG(crhs)
   END IF
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
 !   Matrix factorization
 !
   t0 = seconds()
 !!$  CALL factor(mat, nlmetis=nlmetis)
   CALL to_mat(mat)
   CALL reord_mat(mat, nlmetis=nlmetis);  CALL putmat(fid, '/MAT1', mat)
   CALL numfact(mat)
   tfact = seconds() - t0
   DO i=1,nrank
      CALL getrow(mat, i, arr)
      srow(i) = SUM(arr)
   END DO
   PRINT*, 'Sum of mat after factor', SUM(srow)
  
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
   WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A  = ',mat%p%iparm(18)
   WRITE(*,'(a,i12)')  'Number of factorization MFLOPS      = ',mat%p%iparm(19)
   gflops1 = mat%p%iparm(19) / tfact / 1.d3
 !
 !   Backsolve
 !
   t0 = seconds()
   PRINT*, 'SUM of crhs_r', SUM(crhs_r)
   CALL bsolve(mat, crhs_r, csol_r, debug=.FALSE.)
   WRITE(*,'(a,1pe12.4)') 'Residue =', cnorm2(vmx(mat,csol_r)-crhs_r)
   tsolv = seconds() - t0
   PRINT*, 'SUM of csol_r', SUM(csol_r)
 !
   CALL putarr(fid, '/FT_RHS', crhs_r, 'DFT of RHS')
   CALL putarr(fid, '/FT_SOL', csol_r, 'DFT of Spline coefficients')
 !===========================================================================
 !              4.0 Perform some diagnostics in Fourier space
 !
 !   Fourier spectrum at xpt
 !
   xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8))
   ALLOCATE(cspec0(dk))
   CALL spectrum0(splxy, csol_r, xpt, cspec0)
   WRITE(*,'(/a,f10.5)') 'DFT spectrum (by integration) at x = ', xpt
   DO k=kmin,kmax
      WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
   END DO
 !
   ypt0 = 0.0d0
   WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0
   CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0)
   DO k=kmin,kmax
      WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
   END DO
   ypt0 = splxy%sp2%dft%dx/2.0d0   ! Center of first interval
   WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0
   CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0)
   DO k=kmin,kmax
      WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
   END DO
 !
 !   Spectral energy
 !
   WRITE(*,'(/a)') 'Spectral energies'
   ALLOCATE(energy_k(kmin:kmax))
   energy_k = (0.0d0,0.0d0)
   DO i=1,dimx
      i0_r = (i-1)*dk
      DO k=kmin,kmax
         ik = i0_r+k-kmin+1
         energy_k(k) = energy_k(k) + csol_r(ik)*CONJG(crhs_r(ik))
      END DO
   END DO
   energy_k = energy_k/REAL(ny,8)
   energy = SUM(energy_k)
   DO k=kmin,kmax
      WRITE(*,'(i5,3(1pe15.3))') k, energy_k(k), ABS(energy_k(k))
   END DO
   WRITE(*,'(a5,4(1pe15.3))') 'Sum', energy, ABS(energy), REAL(energy-energy_exact)
 !
   CALL putarr(fid, '/ENERGY_K', energy_k, 'Spectral energies')
 !===========================================================================
 !              5.0 Transform back to real space
 !
 !   Expand to full Fourier space
 !     k  = kmin:kmax  (restricted)
 !     kp = 0:ny-1     (full)
 !
   crhs = (0.0d0,0.0d0)
   DO i=1,nx+nidbas(1)
      i0 = (i-1)*ny
      i0_r = (i-1)*dk
      DO k=kmin,kmax
         kp = k
         IF(kp.LT.0) kp = kp+ny
         csol(i0+kp+1) = csol_r(i0_r+k-kmin+1)
      END DO
   END DO
 !
 !   Fourier transform back to real space
 !
   t0 = seconds()
   DO i=1,nx+nidbas(1)
      i0 = (i-1)*ny
      CALL fourcol(csol(i0+1:i0+ny),-1)
   END DO
   sol = REAL(csol)/REAL(ny,8)
   tfour = tfour + seconds()-t0
 !
 !
 !   Spline coefficients, taking into account of periodicity in y
 !   Note: in SOL, y was numbered first.
 !
   ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,ny) + i*ny + 1
         bcoef(i,j) = sol(ij)
      END DO
   END DO
   CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
 !
 !   Total energy
 !
   WRITE(*,'(/a, 2(1pe15.3))') 'Total energy and error(real space)', &
        &     DOT_PRODUCT(rhs,sol), &
        &     DOT_PRODUCT(rhs,sol)-REAL(energy_exact)               
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
 !===========================================================================
 !              6.0 Check the solution
 !
 !   Check function values computed with various method
 !
   ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
      END DO
   END DO
   jder = (/0,0/)
 !
 !   Compute PPFORM/BCOEFS at first call to gridval
 !
   CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
 !
 !    Fourier spectrum at xpt
 !
   xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8))
   ALLOCATE(cspec(dk))
 !
   ypt0 = 0.0d0
   WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0
   CALL spectrum2(splxy, xpt, ypt0, cspec)
   DO k=kmin,kmax
      WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1))
   END DO
 !
   ypt0 = splxy%sp2%dft%dx/2.0d0   ! Center of first interval
   WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0
   CALL spectrum2(splxy, xpt, ypt0, cspec)
   DO k=kmin,kmax
      WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1))
   END DO
 !
   WRITE(*,'(/a)') '*** Checking solutions'
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   END DO
   tgrid = (seconds() - t0)/REAL(nits)
   errsol = solana - solcal
   IF( SIZE(bcoef,2) .LE. 10 ) THEN
      CALL prnmat('BCOEF', bcoef)
      CALL prnmat('SOLANA', solana)
      CALL prnmat('SOLCAL', solcal)
      CALL prnmat('ERRSOL', errsol)
   END IF
   WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
        &    norm2(errsol) / norm2(solana)
   WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
 !
   CALL putarr(fid, '/xgrid', xgrid, 'r')
   CALL putarr(fid, '/ygrid', ygrid, '\theta')
   CALL putarr(fid, '/sol', solcal, 'Solutions')
   CALL putarr(fid, '/solana', solana,'Exact solutions')
   CALL putarr(fid, '/errors', errsol, 'Errors')
 !
 !   Check derivatives d/dx and d/dy
 !
   WRITE(*,'(/a)') '*** Checking gradient'
   DO i=0,nx
      DO j=0,ny
         IF( mbess .EQ. 0 ) THEN
            solana(i,j) = -2.0d0 * xgrid(i)
         ELSE
            solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
              &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
         END IF
      END DO
   END DO
 !
   jder = (/1,0/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   errsol = solana - solcal
   CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
 !
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
      END DO
   END DO
 !
   jder = (/0,1/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
   errsol = solana - solcal
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
 !
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s)         ', tsolv
   WRITE(*,'(a,1pe12.3)') 'Init FFT time (s)             ', tfour0
   WRITE(*,'(a,1pe12.3)') 'FFT time (s)                  ', tfour
   WRITE(*,'(a,2f10.3)')  'Factor  Gflop/s', gflops1
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(cspec0, cspec)
   DEALLOCATE(mode_couplings)
   DEALLOCATE(xgrid, ygrid, rhs, sol)
   DEALLOCATE(crhs, csol)
   DEALLOCATE(crhs_r, csol_r)
   DEALLOCATE(solcal, solana, errsol)
   DEALLOCATE(bcoef)
   DEALLOCATE(arr)
   DEALLOCATE(srow)
   DEALLOCATE(energy_k)
   CALL destroy_sp(splxy)
   CALL destroy(mat)
 !
   CALL closef(fid)
 !===========================================================================
 !
 CONTAINS
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:,:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i, j
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        DO j=1,SIZE(x,2)
           sum2 = sum2 + x(i,j)**2
        END DO
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
 !
   FUNCTION cnorm2(x)
     DOUBLE COMPLEX, INTENT(in) :: x(:)
     DOUBLE PRECISION :: cnorm2
     cnorm2 = SQRT(DOT_PRODUCT(x,x))    
   END FUNCTION cnorm2
 !
   SUBROUTINE prnmat(label, mat)
     CHARACTER(len=*) :: label
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
     INTEGER :: i
     WRITE(*,'(/a)') TRIM(label)
     DO i=1,SIZE(mat,1)
        WRITE(*,'(10(1pe12.3))') mat(i,:)
     END DO
   END SUBROUTINE prnmat
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
diff --git a/examples/pde2d_sym_wsmp.f90 b/examples/pde2d_sym_wsmp.f90
index 2d0c56e..8d44461 100644
--- a/examples/pde2d_sym_wsmp.f90
+++ b/examples/pde2d_sym_wsmp.f90
@@ -1,696 +1,696 @@
 !>
 !> @file pde2d_sym_wsmp.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !  Solving the 2d PDE using splines and WSMP symmetric matrix
 !
 !    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
 !    exact solution: f(x,y) = (1-x^2) x^m cos(my)
 !
 MODULE pde2d_sym_wsmp_mod
   USE bsplines
   USE wsmp_bsplines
   IMPLICIT NONE
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dismat(spl, mat)
 !
 !   Assembly of FE matrix mat using spline spl
 !
     TYPE(spline2d), INTENT(in)       :: spl
     TYPE(wsmp_mat), INTENT(inout)    :: mat
 !
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2
     INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
     DOUBLE PRECISION:: contrib
 !
     INTEGER :: kterms         ! Number of terms in weak form
     INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
     DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
     INTEGER, ALLOCATABLE :: left1(:), left2(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
     WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
     WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !
 !   Weak form
 !
     kterms = mat%nterms
     ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
     ALLOCATE(coefs(kterms,ng1,ng2))
 !
     ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
     ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! 
 !===========================================================================
 !              2.0 Assembly loop
 !
     ALLOCATE(left1(ng1))
     ALLOCATE(left2(ng2))
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        left1 = i
        CALL basfun(xg1, spl%sp1, fun1, left1)
        DO j=1,n2
           CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
           left2 = j
           CALL basfun(xg2, spl%sp2, fun2, left2)
 !
           DO ig1=1,ng1
              DO ig2=1,ng2
                 CALL coefeq(xg1(ig1), xg2(ig2), &
                      &      idert(:,:,ig1,ig2), &
                      &      iderw(:,:,ig1,ig2), &
                      &      coefs(:,ig1,ig2))
              END DO
           END DO
 !
           DO iw1=0,nidbas1  ! Weight function in dir 1
              igw1 = i+iw1
              DO iw2=0,nidbas2  ! Weight function in dir 2
                 igw2 = MODULO(j+iw2-1, n2) + 1
                 irow = igw2 + (igw1-1)*n2
                 DO it1=0,nidbas1  ! Test function in dir 1
                    igt1 = i+it1
                    DO it2=0,nidbas2  ! Test function in dir 2
                       igt2 = MODULO(j+it2-1, n2) + 1
                       jcol = igt2 + (igt1-1)*n2
 !-------------
                       contrib = 0.0d0
                       DO ig1=1,ng1
                          DO ig2=1,ng2
                             DO iterm=1,kterms
                                contrib = contrib + &
                                     &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
                                     &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
                                     &    coefs(iterm,ig1,ig2) *                 &
                                     &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
                                     &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
                                     &    wg1(ig1) * wg2(ig2)
                             END DO
                          END DO
                       END DO
                       CALL updtmat(mat, irow, jcol, contrib)
 !-------------
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
     DEALLOCATE(idert, iderw, coefs)
     DEALLOCATE(left1,left2)
 !
   CONTAINS
     SUBROUTINE coefeq(x, y, idt, idw, c)
       DOUBLE PRECISION, INTENT(in) :: x, y
       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
       DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
 !
 ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
 !
       c(1) = x        ! 
       idt(1,1) = 1
       idt(1,2) = 0
       idw(1,1) = 1
       idw(1,2) = 0
       !
       c(2) = 1.d0/x
       idt(2,1) = 0
       idt(2,2) = 1
       idw(2,1) = 0
       idw(2,2) = 1
     END SUBROUTINE coefeq
   END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE disrhs(mbess, spl, rhs)
 !
 !   Assembly the RHS using 2d spline spl
 !
     INTEGER, INTENT(in) :: mbess
     TYPE(spline2d), INTENT(in) :: spl
     DOUBLE PRECISION, INTENT(out) :: rhs(:)
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
 !
     ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !===========================================================================
 !              2.0 Assembly loop
 !
     nrank = SIZE(rhs)
     rhs(1:nrank) = 0.0d0
 !
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), spl%sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                 contrib = wg1(ig1)*wg2(ig2) * &
                      &    rhseq(xg1(ig1),xg2(ig2), mbess)
                 DO k1=0,nidbas1
                    i1 = i+k1
                    DO k2=0,nidbas2
                       j2 = MODULO(j+k2-1,n2) + 1
                       ij = j2 + (i1-1)*n2
                       rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
 !
   CONTAINS
     DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
       DOUBLE PRECISION, INTENT(in) :: x1, x2
       INTEGER, INTENT(in) :: m
       rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
     END FUNCTION rhseq
   END SUBROUTINE disrhs
 !
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcmat(mat, ny)
 !
 !   Apply BC on matrix
 !
     TYPE(wsmp_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank, i, j
     DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = mat%rank
     ALLOCATE(zsum(nrank), arr(nrank))
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum on the NY-th row
 !
     zsum = 0.0d0
     DO i=1,ny
        arr = 0.0d0
        CALL getrow(mat, i, arr)
        zsum(:) = zsum(:) + arr(:)
     END DO
     zsum(ny) = SUM(zsum(1:ny))   ! using symmetry
     CALL putrow(mat, ny, zsum)
 !
 !   The away operator
 !
     DO i = 1,ny-1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !  
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     DO i = nrank, nrank-ny+1, -1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(zsum, arr)
 !
   END SUBROUTINE ibcmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcrhs(rhs, ny)
 !
 !   Apply BC on RHS
 !
     DOUBLE PRECISION, INTENT(inout) :: rhs(:)
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank
     DOUBLE PRECISION :: zsum
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = SIZE(rhs,1)
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum
 !
     zsum = SUM(rhs(1:ny))
     rhs(ny) = zsum
     rhs(1:ny-1) = 0.0d0
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     rhs(nrank-ny+1:nrank) = 0.0d0
   END SUBROUTINE ibcrhs
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE pde2d_sym_wsmp_mod
 PROGRAM main
   USE pde2d_sym_wsmp_mod
   USE futils
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
   LOGICAL :: nlppform
   INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
   DOUBLE PRECISION :: pi, coefx(5), coefy(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
   TYPE(spline2d) :: splxy
   TYPE(wsmp_mat) :: mat
 !
   CHARACTER(len=128) :: file='pde2d_sym_wsmp.h5'
   INTEGER :: fid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
   DOUBLE PRECISION :: seconds, mem, dopla
   DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
   DOUBLE PRECISION :: tconv, treord
   INTEGER :: nits=100
   LOGICAL :: nlforce_zero, nlpos
 !
   NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlpos, &
        &            nlforce_zero, coefx, coefy
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intervals in x
   ny = 8              ! Number of intervals in y
   nidbas = (/3,3/)    ! Degree of splines
   ngauss = (/4,4/)    ! Number of Gauss points/interval
   mbess = 2           ! Exponent of differential problem
   nterms = 2          ! Number of terms in weak form
   nlppform = .TRUE.   ! Use PPFORM for gridval or not
   nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
   nlpos = .TRUE.         ! Matrix is positive definite
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x (=radial) & y (=poloidal) axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NY', ny)
   CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
   CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
   CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
   CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
   CALL attach(fid, '/', 'MBESS', mbess)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   t0 = seconds()
   CALL set_spline(nidbas, ngauss, &
        &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
 !
 !   FE matrix assembly
 !
   nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
   WRITE(*,'(a,i8)') 'nrank', nrank
 !
   CALL init(nrank, nterms, mat, nlsym=.TRUE., nlpos=nlpos)
   CALL dismat(splxy, mat)
   ALLOCATE(arr(nrank))
   IF(nrank.LT.100) THEN
      DO i=1,nrank
         CALL getele(mat, i, i, arr(i))
      END DO
      WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr
   END IF
 !
 !   BC on Matrix
 !
   WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
   WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
   CALL ibcmat(mat, ny)
   IF(nrank.LT.100) THEN
      DO i=1,nrank
         CALL getele(mat, i, i, arr(i))
      END DO
      WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr
      WRITE(*,'(a)') 'Last rows'
      DO i=nrank-ny,nrank
         CALL getrow(mat, i, arr)
         WRITE(*,'(10(1pe12.3))') arr
      END DO
   END IF
   tmat = seconds() - t0
 !
 !   RHS assembly
 !
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(mbess, splxy, rhs)
 !
 !   BC on RHS
 !
   CALL ibcrhs(rhs, ny)
 
   CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
   WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
   t0 = seconds()
   CALL factor(mat)
 !
 !   The call to "factor" could be split into the
 !   3 following calls
 !
 !!$  CALL to_mat(mat)
 !!$  tconv = seconds() -t0
 !!$  WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat)
 !!$  WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem()
 !!$!
 !!$  t0 = seconds()
 !!$  CALL reord_mat(mat)
 !!$  CALL putmat(fid, '/MAT', mat)
 !!$  treord = seconds() - t0
 !!$!
 !!$  t0 = seconds()
 !!$  CALL numfact(mat)
   tfact = seconds() - t0
  
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
   WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24)
   WRITE(*,'(a,1pe12.3)')  'Number of factorization GFLOPS      = ',mat%p%dparm(23)/1.d9
   gflops1 = mat%p%dparm(23) / tfact / 1.d9
 !
   CALL bsolve(mat, rhs, sol)
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL bsolve(mat, rhs, sol)
      sol(1:ny-1) = sol(ny)
   END DO
   WRITE(*,'(/a,i6)') 'Number of refinement steps = ',mat%p%iparm(6)
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
   tsolv = (seconds() - t0)/REAL(nits)
 !
 !   Spline coefficients, taking into account of periodicity in y
 !   Note: in SOL, y was numbered first.
 !
   dimx = splxy%sp1%dim
   dimy = splxy%sp2%dim
   ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,ny) + i*ny + 1
         bcoef(i,j) = sol(ij)
      END DO
   END DO
   WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
   CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
 !===========================================================================
 !              4.0 Check the solution
 !
 !   Check function values computed with various method
 !
   ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
      END DO
   END DO
   jder = (/0,0/)
 !
 !   Compute PPFORM/BCOEFS at first call to gridval
   CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
 !
   WRITE(*,'(/a)') '*** Checking solutions'
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   END DO
   tgrid = (seconds() - t0)/REAL(nits)
   errsol = solana - solcal
   IF( SIZE(bcoef,2) .LE. 10 ) THEN
      CALL prnmat('BCOEF', bcoef)
      CALL prnmat('SOLANA', solana)
      CALL prnmat('SOLCAL', solcal)
      CALL prnmat('ERRSOL', errsol)
   END IF
   WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
        &    norm2(errsol) / norm2(solana)
   WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
 !
   CALL putarr(fid, '/xgrid', xgrid, 'r')
   CALL putarr(fid, '/ygrid', ygrid, '\theta')
   CALL putarr(fid, '/sol', solcal, 'Solutions')
   CALL putarr(fid, '/solana', solana,'Exact solutions')
   CALL putarr(fid, '/errors', errsol, 'Errors')
 !
 !   Check derivatives d/dx and d/dy
 !
   WRITE(*,'(/a)') '*** Checking gradient'
   DO i=0,nx
      DO j=0,ny
         IF( mbess .EQ. 0 ) THEN
            solana(i,j) = -2.0d0 * xgrid(i)
         ELSE
            solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
              &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
         END IF
      END DO
   END DO
 !
   jder = (/1,0/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   errsol = solana - solcal
   CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
 !
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
      END DO
   END DO
 !
   jder = (/0,1/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
   errsol = solana - solcal
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
 !
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s)   ', tconv
   WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s)      ', treord
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
   WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s)         ', tsolv
   WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
 !===========================================================================
 !              5.0 Clear the matrix and recompute
 !
   WRITE(*,'(/a)') 'Recompute the solver ...'
   t0 = seconds()
   CALL clear_mat(mat)
   CALL dismat(splxy, mat)
   CALL ibcmat(mat, ny)
   tmat = seconds()-t0
 !
   t0 = seconds()
   CALL numfact(mat)
   tfact = seconds()-t0
   gflops1 = mat%p%dparm(23) / tfact / 1.d9
 !
   t0 = seconds()
   ALLOCATE(newsol(nrank))
   CALL bsolve(mat, rhs, newsol)
   newsol(1:ny-1) = newsol(ny)
   tsolv = seconds()-t0
 !
   WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s)         ', tsolv
   WRITE(*,'(a,1pe12.3)') 'Total (s)                     ', tmat+tfact+tsolv
   WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
 !
   DEALLOCATE(newsol)
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(xgrid, rhs, sol)
   DEALLOCATE(solcal, solana, errsol)
   DEALLOCATE(bcoef)
   DEALLOCATE(arr)
   CALL destroy_sp(splxy)
   CALL destroy(mat)
 !
   CALL closef(fid)
 !===========================================================================
 !
 CONTAINS
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:,:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i, j
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        DO j=1,SIZE(x,2)
           sum2 = sum2 + x(i,j)**2
        END DO
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
   SUBROUTINE prnmat(label, mat)
     CHARACTER(len=*) :: label
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
     INTEGER :: i
     WRITE(*,'(/a)') TRIM(label)
     DO i=1,SIZE(mat,1)
        WRITE(*,'(10(1pe12.3))') mat(i,:)
     END DO
   END SUBROUTINE prnmat
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
diff --git a/examples/pde2d_sym_wsmp_dft.f90 b/examples/pde2d_sym_wsmp_dft.f90
index f3226e9..01625ff 100644
--- a/examples/pde2d_sym_wsmp_dft.f90
+++ b/examples/pde2d_sym_wsmp_dft.f90
@@ -1,1039 +1,1039 @@
 !>
 !> @file pde2d_sym_wsmp_dft.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Ben McMillan <ben.mcmillan@epfl.ch>
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !  Solving the 2d PDE using splines and WSMP symmetric matrix.
 !  The periodic coordinate y is discrete Fourier transformed.
 !
 !    -d/dx[x C d/dx]f - 1x/d/dy[Cd/dy] f = \rho, with f(x=1,y) = 0
 !     C(x,y) = 1 + \epsilon x cos(y)
 !     exact solution: f(x,y) = (1-x^2) x^m cos(my)
 !
 MODULE pde2d_sym_wsmp_dft_mod
   USE bsplines
   USE wsmp_bsplines
   IMPLICIT NONE
 !
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dismat(spl, epsi, mat)
 !
 !   Assembly of FE matrix mat using spline spl
 !
     TYPE(spline2d), INTENT(in)       :: spl
     DOUBLE PRECISION, INTENT(in)     :: epsi
     TYPE(zwsmp_mat), INTENT(inout) :: mat
 !
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: kmin, kmax, dk
     INTEGER :: i, j, ig1, ig2, kc
     INTEGER :: iterm, iw1, mw, igw1, it1, mt, igt1, irow, jcol
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:)
     DOUBLE COMPLEX, ALLOCATABLE   :: ft_fun2(:,:,:), fft_temp(:)
     DOUBLE COMPLEX :: contrib
 !
     INTEGER :: kterms         ! Number of terms in weak form
     INTEGER :: kcoupl         ! Number of mode couplings
     INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
     DOUBLE COMPLEX, ALLOCATABLE  :: coefs(:,:,:,:)          ! Terms in weak form
     INTEGER, ALLOCATABLE :: left1(:), left2(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
     kmin = spl%sp2%dft%kmin
     kmax = spl%sp2%dft%kmax
     dk   = spl%sp2%dft%dk
     WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
     WRITE(*,'(a, 5i6)')  'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
     WRITE(*,'(a, 5i6)')  'kmin, kmax, dk     =', kmin, kmax, dk
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng2), wg2(ng2))
 !
 !   Weak form
 !
     kterms = mat%nterms
     kcoupl = SIZE(spl%sp2%dft%mode_couplings)
     ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
     ALLOCATE(coefs(kterms,kcoupl,ng1,ng2))
 !
 !   Splines and derivatives at all Gauss points
 !
     ALLOCATE(fun1(0:nidbas1,0:1,ng1))    ! Spline and 1st derivative
     ALLOCATE(ft_fun2(kmin:kmax,0:1,ng2)) ! DFT of splines and 1st derivative
     ALLOCATE(fft_temp(0:n2-1))           ! Used in coefeq
 !===========================================================================
 !              2.0 Assembly loop
 !
     ALLOCATE(left1(ng1))
     ALLOCATE(left2(ng2))
 !
 ! First interval in 2nd (periodic) coordinate
 !
     j = 1
     CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2)
     left2 = j
     CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2)
 
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        left1 = i
        CALL basfun(xg1, spl%sp1, fun1, left1)
        DO ig1=1,ng1
           DO ig2=1,ng2
              CALL coefeq(xg1(ig1), xg2(ig2), &
                   &      idert(:,:,ig1,ig2), &
                   &      iderw(:,:,ig1,ig2), &
                   &      coefs(:,:,ig1,ig2))
           END DO
        END DO
 !
        DO iw1=0,nidbas1     ! Weight function in dir 1
           igw1 = i+iw1
           DO it1=0,nidbas1  ! Test function in dir 1
              igt1 = i+it1
              DO mt=kmin,kmax ! Test Fourier mode
                 DO kc=1,kcoupl
                    mw = mt + spl%sp2%dft%mode_couplings(kc)
                    IF(mw.LT.kmin .OR. mw.GT.kmax) CYCLE
 !-------------
                    contrib = (0.0d0, 0.0d0)
                    DO ig1=1,ng1
                       DO ig2=1,ng2
                          DO iterm=1,kterms
                             contrib = contrib + &
                               &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
                               &    ft_fun2(mw,iderw(iterm,2,ig1,ig2),ig2) * &
                               &    coefs(iterm,kc,ig1,ig2) *                 &
                               &    CONJG(ft_fun2(mt,idert(iterm,2,ig1,ig2),ig2)) * &
                               &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
                               &    wg1(ig1) * wg2(ig2) /REAL(n2,8)
                          END DO
                       END DO
                    END DO
                    irow = (igw1-1)*dk + (mw-kmin)+1  ! Number first mode m then radial coord.
                    jcol = (igt1-1)*dk + (mt-kmin)+1
                    CALL updtmat(mat, irow, jcol, contrib)
 !-------------
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, ft_fun2)
     DEALLOCATE(idert, iderw, coefs)
     DEALLOCATE(left1,left2)
     DEALLOCATE(fft_temp)
 !
   CONTAINS
     SUBROUTINE coefeq(x, y, idt, idw, c)
       USE fft
       DOUBLE PRECISION, INTENT(in) :: x, y
       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
       DOUBLE COMPLEX, INTENT(out) :: c(:,:)
 !
       DOUBLE PRECISION :: zcoef, dy
       INTEGER :: j, k, kc, kp
 !
 ! Weak form = Int(x*C*dw/dx*dt/dx + C/x*dw/dy*dt/dy)dxdy
 !      C(x,y) = 1 + epsilon*x*cos(y)
 !
       dy = spl%sp2%dft%dx
       kc = SIZE(spl%sp2%dft%mode_couplings)
       DO j=0,n2-1
          fft_temp(j) = 1.0d0+epsi*x*COS(y+j*dy)
       END DO
       CALL fourcol(fft_temp,1)
       DO k=1,kc
          kp = spl%sp2%dft%mode_couplings(k)
          IF(kp.LT.0) kp=kp+n2
          c(1,k) = x*fft_temp(kp)
          c(2,k) = fft_temp(kp)/x
       END DO
 !!$      WRITE(*,'(a/(10(1pe12.4)))') 'fft_temp', ABS(fft_temp)
 !!$      WRITE(*,'(a/(10(1pe12.4)))') 'c1', ABS(c(1,:))
 !!$      WRITE(*,'(a/(10(1pe12.4)))') 'c2', ABS(c(2,:))
 !
       idt(1,1) = 1
       idt(1,2) = 0
       idw(1,1) = 1
       idw(1,2) = 0
 !
       idt(2,1) = 0
       idt(2,2) = 1
       idw(2,1) = 0
       idw(2,2) = 1
     END SUBROUTINE coefeq
   END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE disrhs(mbess, epsi, spl, rhs)
 !
 !   Assembly the RHS using 2d spline spl
 !
     INTEGER, INTENT(in)           :: mbess
     DOUBLE PRECISION, INTENT(in)  :: epsi
     TYPE(spline2d), INTENT(in)    :: spl
     DOUBLE PRECISION, INTENT(out) :: rhs(:)
 !
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
 !
     ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !===========================================================================
 !              2.0 Assembly loop
 !
     nrank = SIZE(rhs)
     rhs(1:nrank) = 0.0d0
 !
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), spl%sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                 contrib = wg1(ig1)*wg2(ig2) * &
                      &    rhseq(xg1(ig1),xg2(ig2), mbess)
                 DO k1=0,nidbas1
                    i1 = i+k1
                    DO k2=0,nidbas2
                       j2 = MODULO(j+k2-1,n2) + 1
                       ij = j2 + (i1-1)*n2
                       rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
 !
   CONTAINS
     DOUBLE PRECISION FUNCTION rhseq(x, y, m)
       DOUBLE PRECISION, INTENT(in) :: x, y
       INTEGER, INTENT(in)          :: m
 !
       DOUBLE PRECISION :: xm
       xm = REAL(m,8)
       rhseq = x**(m+1) * ( 4.0d0*(xm+1.0d0)*COS(xm*y) + &
            &  epsi*x*( &
            &    ( (3.0d0*(xm+1.0d0) - xm/x**2)*COS((xm-1.0d0)*y) + &
            &     (3.0d0+2.0d0*xm)*COS((xm+1.0d0)*y) ) &
            &    ))
     END FUNCTION rhseq
   END SUBROUTINE disrhs
 !
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcmat(mat, dft)
 !
 !   Apply BC on matrix
 !
     TYPE(zwsmp_mat), INTENT(inout) :: mat
     TYPE(dftmap), INTENT(in)          :: dft
     INTEGER :: nrank, k, kmin, kmax, dk, i
     DOUBLE COMPLEX :: arr(mat%rank)
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = mat%rank
     kmin = dft%kmin
     kmax = dft%kmax
     dk   = dft%dk
 !===========================================================================
 !              2.0 BC at the axis
 !
 !   zero for non-zero modes
 !
     DO k=kmin,kmax
        IF(k.NE.0) THEN
           i = k-kmin+1
           arr = 0.0d0; arr(i) = 1.0d0
           CALL putrow(mat, i, arr)
        END IF
     END DO
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     DO i = nrank, nrank-dk+1, -1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !
   END SUBROUTINE ibcmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcrhs(rhs, dft)
 !
 !   Apply BC on RHS
 !
     DOUBLE COMPLEX, INTENT(inout) :: rhs(:)
     TYPE(dftmap), INTENT(in)      :: dft
     INTEGER :: nrank, kmin, kmax, dk, k
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = SIZE(rhs,1)
     kmin = dft%kmin
     kmax = dft%kmax
     dk = dft%dk
 !===========================================================================
 !              2.0 BC at the axis
 !
 !   zero for non-zero modes
 !
     DO k=kmin,kmax
        IF(k.NE.0) rhs(k-kmin+1) = 0.0
     END DO
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     rhs(nrank-dk+1:nrank) = 0.0d0
   END SUBROUTINE ibcrhs
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE spectrum0(spl, carr, xpt, cspec)
 !
 !  DFT modes at xpt (integration on the first interval)
 !
     DOUBLE COMPLEX, PARAMETER   :: ci = (0.0d0,1.0d0)
     DOUBLE PRECISION, PARAMETER :: pi = 3.141592653589793d0
     TYPE(spline2d), INTENT(in)   :: spl
     DOUBLE COMPLEX, INTENT(in)   :: carr(:)
     DOUBLE PRECISION, INTENT(in) :: xpt
     DOUBLE COMPLEX, INTENT(out)  :: cspec(:)
 !
     INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
     INTEGER :: k,  kmin, kmax, dk, kk
     INTEGER :: ng2, ig2
     INTEGER, ALLOCATABLE :: left2(:)
     DOUBLE PRECISION :: temp(1)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:)
     DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:,:), psi(:), coefs(:,:)
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
     kmin = spl%sp2%dft%kmin
     kmax = spl%sp2%dft%kmax
     dk   = spl%sp2%dft%dk
 !
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(left2(ng2))
     ALLOCATE(xg2(ng2), wg2(ng2))
     ALLOCATE(ft_fun2(kmin:kmax,1,ng2)) ! DFT of splines
     ALLOCATE(psi(kmin:kmax))
 !
 !  Integration over first interval
 !
     left2 = 1
     CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2)
     CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2)
     psi = (0.0d0,0.0d0)
     DO k=kmin,kmax
        DO ig2=1,ng2
           psi(k)  = psi(k) + wg2(ig2)*EXP(k*ci*xg2(ig2))*CONJG(ft_fun2(k,1,ig2))
        END DO
     END DO
 !
     ALLOCATE(coefs(dk,ndim1))
     coefs = RESHAPE(carr, SHAPE(coefs))
     temp = xpt
     DO kk=kmin,kmax
        k=kk-kmin+1
        coefs(k,:) = psi(kk)*coefs(k,:)
        CALL gridval(spl%sp1, temp, cspec(k:k), 0, coefs(k,:))
     END DO
     cspec = cspec/(2.0d0*pi)
 !
     DEALLOCATE(left2)
     DEALLOCATE(xg2, wg2)
     DEALLOCATE(ft_fun2)
     DEALLOCATE(psi)
     DEALLOCATE(coefs)    
   END SUBROUTINE spectrum0
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE spectrum1(spl, carr, xpt, ypt0, cspec)
 !
 !  DFT modes at xpt (at the initial ypt0)
 !
     DOUBLE COMPLEX, PARAMETER   :: ci = (0.0d0,1.0d0)
 !
     TYPE(spline2d), INTENT(in)   :: spl
     DOUBLE COMPLEX, INTENT(in)   :: carr(:)
     DOUBLE PRECISION, INTENT(in) :: xpt, ypt0
     DOUBLE COMPLEX, INTENT(out)  :: cspec(:)
 !
     INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
     INTEGER :: k,  kmin, kmax, dk
     DOUBLE PRECISION :: temp(1)
     DOUBLE COMPLEX :: ctemp(1)
     DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:), coefs(:,:)
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
     kmin = spl%sp2%dft%kmin
     kmax = spl%sp2%dft%kmax
     dk   = spl%sp2%dft%dk
 !
 ! DFT of splines at ypt0
     ALLOCATE(ft_fun2(kmin:kmax,1)) 
     ALLOCATE(coefs(kmin:kmax,ndim1))
     CALL ft_basfun(ypt0, spl%sp2, ft_fun2, 1)
     coefs = RESHAPE(carr, SHAPE(coefs))
 !
     temp = xpt
     DO k=kmin,kmax
        CALL gridval(spl%sp1, temp, ctemp, 0, coefs(k,:))
        cspec(k-kmin+1) = CONJG(ft_fun2(k,1))*ctemp(1)*EXP(k*ci*ypt0)
     END DO
     cspec = cspec/REAL(n2,8)
 !
     DEALLOCATE(ft_fun2)
     DEALLOCATE(coefs)    
   END SUBROUTINE spectrum1
 !!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE spectrum2(spl, xpt, ypt0, cspec)
 !
 !  DFT modes at xpt (at the initial ypt0)
 !
     USE fft
     USE bsplines
 !
     DOUBLE COMPLEX, PARAMETER   :: ci = (0.0d0,1.0d0)
 !
     TYPE(spline2d)               :: spl
     DOUBLE PRECISION, INTENT(in) :: xpt, ypt0
     DOUBLE COMPLEX, INTENT(out)  :: cspec(:)
 !
     INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
     INTEGER :: k,  kmin, kmax, dk
     DOUBLE PRECISION, ALLOCATABLE :: ypt(:), fun(:,:)
     DOUBLE COMPLEX, ALLOCATABLE ::   ft_fun(:)
     DOUBLE PRECISION :: temp(1)
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
     kmin = spl%sp2%dft%kmin
     kmax = spl%sp2%dft%kmax
     dk   = spl%sp2%dft%dk
 !
     ALLOCATE(ypt(0:n2-1))
     ALLOCATE(fun(1, 0:n2-1))
     ALLOCATE(ft_fun(0:n2-1))
 !
 !  Function values at points ypt
 !
     ypt(0:n2-1) = ypt0 + spl%sp2%knots(0:n2-1)
     temp = xpt
     CALL gridval(spl, temp, ypt, fun, (/0,0/))
     ft_fun = fun(1,:)
 !
 !  Discrete Fourier Transform
 !
     CALL fourcol(ft_fun, 1)
     DO k=kmin,kmax
        IF(k.LT.0) THEN
           cspec(k-kmin+1) = ft_fun(k+n2)*EXP(k*ci*ypt0)
        ELSE
           cspec(k-kmin+1) = ft_fun(k)*EXP(k*ci*ypt0)
        END IF
     END DO
     cspec = cspec/REAL(n2,8)
 !
     DEALLOCATE(ypt)
     DEALLOCATE(fun)
     DEALLOCATE(ft_fun)
   END SUBROUTINE spectrum2
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE pde2d_sym_wsmp_dft_mod
 PROGRAM main
   USE pde2d_sym_wsmp_dft_mod
   USE futils
   USE fft
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms, kmin, kmax, dk
   INTEGER :: n_mode_couplings
   INTEGER, ALLOCATABLE :: mode_couplings(:)
   LOGICAL :: nlppform
   INTEGER :: i, j, ij, dimx, dimy, nrank, nrank_full, jder(2), it, i0, i0_r
   INTEGER :: k, kp, ik
   DOUBLE PRECISION :: pi, epsi, coefx(5), coefy(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
   DOUBLE COMPLEX, ALLOCATABLE   ::  crhs(:), crhs_r(:), csol(:), csol_r(:)
   TYPE(spline2d) :: splxy
   TYPE(zwsmp_mat) :: mat
 !
   CHARACTER(len=128) :: file='pde2d_sym_wsmp_dft.h5'
   INTEGER :: fid
   DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: arr, srow
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
   DOUBLE PRECISION :: seconds, mem, dopla
   DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tfour, tfour0, tgrid, gflops1
   INTEGER :: nits=100
   LOGICAL :: nlmetis, nlforce_zero, nlpos
 !
   DOUBLE PRECISION :: xpt, ypt0
   DOUBLE COMPLEX, ALLOCATABLE :: cspec0(:), cspec(:), energy_k(:)
   DOUBLE COMPLEX :: energy, energy_exact
 !
   NAMELIST /newrun/ nx, ny, nidbas, ngauss, kmin, kmax, mbess, epsi, &
        &            nlppform, nlmetis, nlforce_zero, nlpos, coefx, coefy
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intervals in x
   ny = 8              ! Number of intervals in y
   nidbas = (/3,3/)    ! Degree of splines
   ngauss = (/4,4/)    ! Number of Gauss points/interval
   kmin = -3           ! Minimum Fourier mode number
   kmax = 3            ! Maximum Fourier mode number
   mbess = 2           ! Exponent of differential problem
   epsi = 0.5          ! Non-uniformity in the Laplacian coefficicient
   nterms = 2          ! Number of terms in weak form
   nlppform = .TRUE.   ! Use PPFORM for gridval or not
   nlmetis = .FALSE.   ! Use metis ordering or minimum degree
   nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
   nlpos = .TRUE.         ! Matrix is positive definite
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !!
 !   Read table of mode couplings
 !
   READ(*,*) n_mode_couplings
   ALLOCATE(mode_couplings(n_mode_couplings))
   READ(*,*) mode_couplings
   WRITE(*,'(/a/(20i4))') 'Mode couplings', mode_couplings
 !
 !   Define grid on x (=radial) & y (=poloidal) axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
 !
 !   Exact energy
 !
   energy_exact = 2.0d0*pi/REAL(2+mbess,8)
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NY', ny)
   CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
   CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
   CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
   CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
   CALL attach(fid, '/', 'MBESS', mbess)
   CALL attach(fid, '/', 'EPSI', epsi)
   CALL attach(fid, '/', 'KMIN', kmin)
   CALL attach(fid, '/', 'KMAX', kmax)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, &
        &          (/.FALSE., .TRUE./))
 !
 !   Init DFT for spline in 2nd direction
 !
   CALL init_dft(splxy%sp2, kmin, kmax, mode_couplings)
   dk = splxy%sp2%dft%dk
 !
 !   FE matrix assembly
 !
   dimx = splxy%sp1%dim
   dimy = splxy%sp2%dim
   nrank = (nx+nidbas(1))*dk       ! Rank of restricted matrix
   nrank_full = (nx+nidbas(1))*ny  ! Rank of full matrix
 !
   ALLOCATE(rhs(nrank_full), sol(nrank_full))
   ALLOCATE(crhs(nrank_full), csol(nrank_full))
   ALLOCATE(crhs_r(nrank), csol_r(nrank))
 !
   WRITE(*,'(a,i8)') 'nrank_full', nrank_full
   WRITE(*,'(a,i8)') 'nrank     ', nrank
 !
   t0 = seconds()
   CALL init(nrank, nterms, mat, nlherm=.TRUE., nlpos=nlpos)
   CALL dismat(splxy, epsi, mat)
   ALLOCATE(arr(nrank))
   ALLOCATE(srow(nrank))
   DO i=1,nrank
      CALL getrow(mat, i, arr)
      srow(i) = SUM(arr)
   END DO
 !!$  WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows before BC', REAL(srow)
 !!$  WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows before BC', AIMAG(srow)
   PRINT*, 'Sum of mat before BC', SUM(srow)
 !
 !   BC on Matrix
 !
   WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
   WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
   CALL ibcmat(mat, splxy%sp2%dft)
   tmat = seconds() - t0
   DO i=1,nrank
      CALL getrow(mat, i, arr)
      srow(i) = SUM(arr)
   END DO
 !!$  WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows after BC', REAL(srow)
 !!$  WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows after BC', AIMAG(srow)
   PRINT*, 'Sum of mat after BC', SUM(srow)
 !
   WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
 !
 !   RHS assembly
 !
   CALL disrhs(mbess, epsi, splxy, rhs)
 !
 !   Init FFT
 !
   t0 = seconds()
   CALL fourcol(crhs(1:ny),1)
   CALL fourcol(crhs(1:ny),-1)
   tfour0 = seconds()-t0
   crhs = crhs/REAL(ny,8)
 !
 !  DFT of RHS
 !
   t0 = seconds()
   crhs = rhs
   DO i=1,nx+nidbas(1)
      i0 = (i-1)*ny
      CALL fourcol(crhs(i0+1:i0+ny), 1)
   END DO
   tfour = seconds()-t0
 !
 !   Restriction in Fourier space
 !     k  = kmin:kmax  (restricted)
 !     kp = 0:ny-1     (full)
 !
   DO i=1,nx+nidbas(1)
      i0 = (i-1)*ny
      i0_r = (i-1)*dk
      DO k=kmin,kmax
         kp = k
         IF(kp.LT.0) kp = kp+ny
         crhs_r(i0_r+k-kmin+1) = crhs(i0+kp+1)
      END DO
   END DO
 !
 !   BC on RHS
 !
   CALL ibcrhs(crhs_r, splxy%sp2%dft)
 !
   IF(nrank.LT.100) THEN
      WRITE(*,'(a/(10(1pe12.3)))') 'Real of crhs', REAL(crhs)
      WRITE(*,'(a/(10(1pe12.3)))') 'Imag of crhs', AIMAG(crhs)
   END IF
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
 !   Matrix factorization
 !
   t0 = seconds()
 !!$  CALL factor(mat)
   CALL to_mat(mat)
   CALL reord_mat(mat);  CALL putmat(fid, '/MAT1', mat)
   CALL numfact(mat)
   tfact = seconds() - t0
   DO i=1,nrank
      CALL getrow(mat, i, arr)
      srow(i) = SUM(arr)
   END DO
   PRINT*, 'Sum of mat after factor', SUM(srow)
   PRINT*, 'iparm(64) after factor', mat%p%iparm(64)
  
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
   WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24)
   WRITE(*,'(a,1pe12.3)')  'Number of factorization GFLOPS      = ',mat%p%dparm(23)/1.d9
   gflops1 = mat%p%dparm(23) / tfact / 1.d9
 !
 !   Backsolve
 !
   t0 = seconds()
   PRINT*, 'SUM of crhs_r', SUM(crhs_r)
   CALL bsolve(mat, crhs_r, csol_r)
   tsolv = seconds() - t0
   WRITE(*,'(a,1pe12.4)') 'Residue =', cnorm2(vmx(mat,csol_r)-crhs_r)
   PRINT*, 'SUM of csol_r', SUM(csol_r)
   PRINT*, 'iparm(64) after bsolve', mat%p%iparm(64)
   PRINT*, 'Residue from WSMP', mat%p%dparm(7)
   WRITE(*,'(a/(20i4))') 'iparm', mat%p%iparm
 !
   CALL putarr(fid, '/FT_RHS', crhs_r, 'DFT of RHS')
   CALL putarr(fid, '/FT_SOL', csol_r, 'DFT of Spline coefficients')
 !===========================================================================
 !              4.0 Perform some diagnostics in Fourier space
 !
 !   Fourier spectrum at xpt
 !
   xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8))
   ALLOCATE(cspec0(dk))
   CALL spectrum0(splxy, csol_r, xpt, cspec0)
   WRITE(*,'(/a,f10.5)') 'DFT spectrum (by integration) at x = ', xpt
   DO k=kmin,kmax
      WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
   END DO
 !
   ypt0 = 0.0d0
   WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0
   CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0)
   DO k=kmin,kmax
      WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
   END DO
   ypt0 = splxy%sp2%dft%dx/2.0d0   ! Center of first interval
   WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0
   CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0)
   DO k=kmin,kmax
      WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
   END DO
 !
 !   Spectral energy
 !
   WRITE(*,'(/a)') 'Spectral energies'
   ALLOCATE(energy_k(kmin:kmax))
   energy_k = (0.0d0,0.0d0)
   DO i=1,dimx
      i0_r = (i-1)*dk
      DO k=kmin,kmax
         ik = i0_r+k-kmin+1
         energy_k(k) = energy_k(k) + csol_r(ik)*CONJG(crhs_r(ik))
      END DO
   END DO
   energy_k = energy_k/REAL(ny,8)
   energy = SUM(energy_k)
   DO k=kmin,kmax
      WRITE(*,'(i5,3(1pe15.3))') k, energy_k(k), ABS(energy_k(k))
   END DO
   WRITE(*,'(a5,4(1pe15.3))') 'Sum', energy, ABS(energy), REAL(energy-energy_exact)
 !
   CALL putarr(fid, '/ENERGY_K', energy_k, 'Spectral energies')
 !===========================================================================
 !              5.0 Transform back to real space
 !
 !   Expand to full Fourier space
 !     k  = kmin:kmax  (restricted)
 !     kp = 0:ny-1     (full)
 !
   crhs = (0.0d0,0.0d0)
   DO i=1,nx+nidbas(1)
      i0 = (i-1)*ny
      i0_r = (i-1)*dk
      DO k=kmin,kmax
         kp = k
         IF(kp.LT.0) kp = kp+ny
         csol(i0+kp+1) = csol_r(i0_r+k-kmin+1)
      END DO
   END DO
 !
 !   Fourier transform back to real space
 !
   t0 = seconds()
   DO i=1,nx+nidbas(1)
      i0 = (i-1)*ny
      CALL fourcol(csol(i0+1:i0+ny),-1)
   END DO
   sol = REAL(csol)/REAL(ny,8)
   tfour = tfour + seconds()-t0
 !
 !
 !   Spline coefficients, taking into account of periodicity in y
 !   Note: in SOL, y was numbered first.
 !
   ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,ny) + i*ny + 1
         bcoef(i,j) = sol(ij)
      END DO
   END DO
   CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
 !
 !   Total energy
 !
   WRITE(*,'(/a, 2(1pe15.3))') 'Total energy and error(real space)', &
        &     DOT_PRODUCT(rhs,sol), &
        &     DOT_PRODUCT(rhs,sol)-REAL(energy_exact)               
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
 !===========================================================================
 !              6.0 Check the solution
 !
 !   Check function values computed with various method
 !
   ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
      END DO
   END DO
   jder = (/0,0/)
 !
 !   Compute PPFORM/BCOEFS at first call to gridval
 !
   CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
 !
 !    Fourier spectrum at xpt
 !
   xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8))
   ALLOCATE(cspec(dk))
 !
   ypt0 = 0.0d0
   WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0
   CALL spectrum2(splxy, xpt, ypt0, cspec)
   DO k=kmin,kmax
      WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1))
   END DO
 !
   ypt0 = splxy%sp2%dft%dx/2.0d0   ! Center of first interval
   WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0
   CALL spectrum2(splxy, xpt, ypt0, cspec)
   DO k=kmin,kmax
      WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1))
   END DO
 !
   WRITE(*,'(/a)') '*** Checking solutions'
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   END DO
   tgrid = (seconds() - t0)/REAL(nits)
   errsol = solana - solcal
   IF( SIZE(bcoef,2) .LE. 10 ) THEN
      CALL prnmat('BCOEF', bcoef)
      CALL prnmat('SOLANA', solana)
      CALL prnmat('SOLCAL', solcal)
      CALL prnmat('ERRSOL', errsol)
   END IF
   WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
        &    norm2(errsol) / norm2(solana)
   WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
 !
   CALL putarr(fid, '/xgrid', xgrid, 'r')
   CALL putarr(fid, '/ygrid', ygrid, '\theta')
   CALL putarr(fid, '/sol', solcal, 'Solutions')
   CALL putarr(fid, '/solana', solana,'Exact solutions')
   CALL putarr(fid, '/errors', errsol, 'Errors')
 !
 !   Check derivatives d/dx and d/dy
 !
   WRITE(*,'(/a)') '*** Checking gradient'
   DO i=0,nx
      DO j=0,ny
         IF( mbess .EQ. 0 ) THEN
            solana(i,j) = -2.0d0 * xgrid(i)
         ELSE
            solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
              &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
         END IF
      END DO
   END DO
 !
   jder = (/1,0/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   errsol = solana - solcal
   CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
 !
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
      END DO
   END DO
 !
   jder = (/0,1/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
   errsol = solana - solcal
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
 !
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s)         ', tsolv
   WRITE(*,'(a,1pe12.3)') 'Init FFT time (s)             ', tfour0
   WRITE(*,'(a,1pe12.3)') 'FFT time (s)                  ', tfour
   WRITE(*,'(a,2f10.3)')  'Factor  Gflop/s', gflops1
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(cspec0, cspec)
   DEALLOCATE(mode_couplings)
   DEALLOCATE(xgrid, ygrid, rhs, sol)
   DEALLOCATE(crhs, csol)
   DEALLOCATE(crhs_r, csol_r)
   DEALLOCATE(solcal, solana, errsol)
   DEALLOCATE(bcoef)
   DEALLOCATE(arr)
   DEALLOCATE(srow)
   DEALLOCATE(energy_k)
   CALL destroy_sp(splxy)
   CALL destroy(mat)
 !
   CALL closef(fid)
 !===========================================================================
 !
 CONTAINS
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:,:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i, j
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        DO j=1,SIZE(x,2)
           sum2 = sum2 + x(i,j)**2
        END DO
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
 !
   FUNCTION cnorm2(x)
     DOUBLE COMPLEX, INTENT(in) :: x(:)
     DOUBLE PRECISION :: cnorm2
     cnorm2 = SQRT(DOT_PRODUCT(x,x))    
   END FUNCTION cnorm2
 !
   SUBROUTINE prnmat(label, mat)
     CHARACTER(len=*) :: label
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
     INTEGER :: i
     WRITE(*,'(/a)') TRIM(label)
     DO i=1,SIZE(mat,1)
        WRITE(*,'(10(1pe12.3))') mat(i,:)
     END DO
   END SUBROUTINE prnmat
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
diff --git a/examples/pde2d_wsmp.f90 b/examples/pde2d_wsmp.f90
index 6a20630..f227e92 100644
--- a/examples/pde2d_wsmp.f90
+++ b/examples/pde2d_wsmp.f90
@@ -1,711 +1,711 @@
 !>
 !> @file pde2d_wsmp.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !  Solving the 2d PDE using splines and WSMP non-symmetric matrix
 !
 !    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
 !    exact solution: f(x,y) = (1-x^2) x^m cos(my)
 !
 MODULE pde2d_wsmp_mod
   USE bsplines
   USE wsmp_bsplines
   IMPLICIT NONE
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dismat(spl, mat)
 !
 !   Assembly of FE matrix mat using spline spl
 !
     TYPE(spline2d), INTENT(in)       :: spl
     TYPE(wsmp_mat), INTENT(inout)    :: mat
 !
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2
     INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
     DOUBLE PRECISION:: contrib
 !
     INTEGER :: kterms         ! Number of terms in weak form
     INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
     DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
     INTEGER, ALLOCATABLE :: left1(:), left2(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
     WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
     WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !
 !   Weak form
 !
     kterms = mat%nterms
     ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
     ALLOCATE(coefs(kterms,ng1,ng2))
 !
     ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
     ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! 
 !===========================================================================
 !              2.0 Assembly loop
 !
     ALLOCATE(left1(ng1))
     ALLOCATE(left2(ng2))
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        left1 = i
        CALL basfun(xg1, spl%sp1, fun1, left1)
        DO j=1,n2
           CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
           left2 = j
           CALL basfun(xg2, spl%sp2, fun2, left2)
 !
           DO ig1=1,ng1
              DO ig2=1,ng2
                 CALL coefeq(xg1(ig1), xg2(ig2), &
                      &      idert(:,:,ig1,ig2), &
                      &      iderw(:,:,ig1,ig2), &
                      &      coefs(:,ig1,ig2))
              END DO
           END DO
 !
           DO iw1=0,nidbas1  ! Weight function in dir 1
              igw1 = i+iw1
              DO iw2=0,nidbas2  ! Weight function in dir 2
                 igw2 = MODULO(j+iw2-1, n2) + 1
                 irow = igw2 + (igw1-1)*n2
                 DO it1=0,nidbas1  ! Test function in dir 1
                    igt1 = i+it1
                    DO it2=0,nidbas2  ! Test function in dir 2
                       igt2 = MODULO(j+it2-1, n2) + 1
                       jcol = igt2 + (igt1-1)*n2
 !-------------
                       contrib = 0.0d0
                       DO ig1=1,ng1
                          DO ig2=1,ng2
                             DO iterm=1,kterms
                                contrib = contrib + &
                                     &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
                                     &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
                                     &    coefs(iterm,ig1,ig2) *                 &
                                     &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
                                     &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
                                     &    wg1(ig1) * wg2(ig2)
                             END DO
                          END DO
                       END DO
                       CALL updtmat(mat, irow, jcol, contrib)
 !-------------
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
     DEALLOCATE(idert, iderw, coefs)
     DEALLOCATE(left1,left2)
 !
   CONTAINS
     SUBROUTINE coefeq(x, y, idt, idw, c)
       DOUBLE PRECISION, INTENT(in) :: x, y
       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
       DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
 !
 ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
 !
       c(1) = x        ! 
       idt(1,1) = 1
       idt(1,2) = 0
       idw(1,1) = 1
       idw(1,2) = 0
       !
       c(2) = 1.d0/x
       idt(2,1) = 0
       idt(2,2) = 1
       idw(2,1) = 0
       idw(2,2) = 1
     END SUBROUTINE coefeq
   END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE disrhs(mbess, spl, rhs)
 !
 !   Assembly the RHS using 2d spline spl
 !
     INTEGER, INTENT(in) :: mbess
     TYPE(spline2d), INTENT(in) :: spl
     DOUBLE PRECISION, INTENT(out) :: rhs(:)
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
 !
     ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !===========================================================================
 !              2.0 Assembly loop
 !
     nrank = SIZE(rhs)
     rhs(1:nrank) = 0.0d0
 !
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), spl%sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                 contrib = wg1(ig1)*wg2(ig2) * &
                      &    rhseq(xg1(ig1),xg2(ig2), mbess)
                 DO k1=0,nidbas1
                    i1 = i+k1
                    DO k2=0,nidbas2
                       j2 = MODULO(j+k2-1,n2) + 1
                       ij = j2 + (i1-1)*n2
                       rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
 !
   CONTAINS
     DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
       DOUBLE PRECISION, INTENT(in) :: x1, x2
       INTEGER, INTENT(in) :: m
       rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
     END FUNCTION rhseq
   END SUBROUTINE disrhs
 !
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcmat(mat, ny)
 !
 !   Apply BC on matrix
 !
     TYPE(wsmp_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank, i, j
     DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = mat%rank
     ALLOCATE(zsum(nrank), arr(nrank))
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum on the NY-th row
 !
     zsum = 0.0d0
     DO i=1,ny
        arr = 0.0d0
        CALL getrow(mat, i, arr)
        zsum(:) = zsum(:) + arr(:)
     END DO
     CALL putrow(mat, ny, zsum)
 !
 !   The horizontal sum on the NY-th column
 !
     zsum = 0.0d0
     DO j=1,ny
        arr = 0.0d0
        CALL getcol(mat, j, arr)
        zsum(ny:) = zsum(ny:) + arr(ny:)
     END DO
     CALL putcol(mat, ny, zsum)
 !
 !   The away operator
 !
     DO j = 1,ny-1
        arr = 0.0d0; arr(j) = 1.0d0
        CALL putcol(mat, j, arr)     
     END DO
 !
     DO i = 1,ny-1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !  
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     DO j = nrank, nrank-ny+1, -1
        arr = 0.0d0; arr(j) = 1.0d0
        CALL putcol(mat, j, arr)     
     END DO
 !
     DO i = nrank, nrank-ny+1, -1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(zsum, arr)
 !
   END SUBROUTINE ibcmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcrhs(rhs, ny)
 !
 !   Apply BC on RHS
 !
     DOUBLE PRECISION, INTENT(inout) :: rhs(:)
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank
     DOUBLE PRECISION :: zsum
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = SIZE(rhs,1)
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum
 !
     zsum = SUM(rhs(1:ny))
     rhs(ny) = zsum
     rhs(1:ny-1) = 0.0d0
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     rhs(nrank-ny+1:nrank) = 0.0d0
   END SUBROUTINE ibcrhs
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE pde2d_wsmp_mod
 PROGRAM main
   USE pde2d_wsmp_mod
   USE futils
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
   LOGICAL :: nlppform
   INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
   DOUBLE PRECISION :: pi, coefx(5), coefy(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
   TYPE(spline2d) :: splxy
   TYPE(wsmp_mat) :: mat
 !
   CHARACTER(len=128) :: file='pde2d_wsmp.h5'
   INTEGER :: fid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
   DOUBLE PRECISION :: seconds, mem, dopla
   DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
   DOUBLE PRECISION :: tconv, treord
   INTEGER :: nits=100
   LOGICAL :: nlforce_zero
   LOGICAL :: nlserial
 !
   NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, &
        &            nlforce_zero, nlserial, coefx, coefy
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intervals in x
   ny = 8              ! Number of intervals in y
   nidbas = (/3,3/)    ! Degree of splines
   ngauss = (/4,4/)    ! Number of Gauss points/interval
   mbess = 2           ! Exponent of differential problem
   nterms = 2          ! Number of terms in weak form
   nlppform = .TRUE.   ! Use PPFORM for gridval or not
   nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x (=radial) & y (=poloidal) axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NY', ny)
   CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
   CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
   CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
   CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
   CALL attach(fid, '/', 'MBESS', mbess)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   t0 = seconds()
   CALL set_spline(nidbas, ngauss, &
        &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
 !
 !   FE matrix assembly
 !
   nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
   WRITE(*,'(a,i8)') 'nrank', nrank
 !
   CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero)
   CALL dismat(splxy, mat)
   ALLOCATE(arr(nrank))
   IF(nrank.LT.100) THEN
      DO i=1,nrank
         CALL getele(mat, i, i, arr(i))
      END DO
      WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr
   END IF
 !
 !   BC on Matrix
 !
   WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
   WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
   CALL ibcmat(mat, ny)
   IF(nrank.LT.100) THEN
      DO i=1,nrank
         CALL getele(mat, i, i, arr(i))
      END DO
      WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr
      WRITE(*,'(a)') 'Last rows'
      DO i=nrank-ny,nrank
         CALL getrow(mat, i, arr)
         WRITE(*,'(10(1pe12.3))') arr
      END DO
   END IF
   tmat = seconds() - t0
 !
 !   RHS assembly
 !
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(mbess, splxy, rhs)
 !
 !   BC on RHS
 !
   CALL ibcrhs(rhs, ny)
 !
   CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
   WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
   t0 = seconds()
   CALL to_mat(mat)
   tconv = seconds() -t0
   WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat)
   WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem()
 !
   t0 = seconds()
   CALL reord_mat(mat)
   CALL putmat(fid, '/MAT', mat)
   treord = seconds() - t0
 !
   t0 = seconds()
   CALL numfact(mat)
   tfact = seconds() - t0
  
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
   WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24)
   WRITE(*,'(a,1pe12.3)')  'Number of factorization GFLOPS      = ',mat%p%dparm(23)/1.d9
   gflops1 = mat%p%dparm(23) / tfact / 1.d9
 !
   CALL bsolve(mat, rhs, sol)
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL bsolve(mat, rhs, sol)
      sol(1:ny-1) = sol(ny)
   END DO
   WRITE(*,'(/a,i6)') 'Number of refinement steps = ',mat%p%iparm(26)
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
   tsolv = (seconds() - t0)/REAL(nits)
 !
 !   Spline coefficients, taking into account of periodicity in y
 !   Note: in SOL, y was numbered first.
 !
   dimx = splxy%sp1%dim
   dimy = splxy%sp2%dim
   ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,ny) + i*ny + 1
         bcoef(i,j) = sol(ij)
      END DO
   END DO
   WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
   CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
 !===========================================================================
 !              4.0 Check the solution
 !
 !   Check function values computed with various method
 !
   ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
      END DO
   END DO
   jder = (/0,0/)
 !
 !   Compute PPFORM/BCOEFS at first call to gridval
   CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
 !
   WRITE(*,'(/a)') '*** Checking solutions'
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   END DO
   tgrid = (seconds() - t0)/REAL(nits)
   errsol = solana - solcal
   IF( SIZE(bcoef,2) .LE. 10 ) THEN
      CALL prnmat('BCOEF', bcoef)
      CALL prnmat('SOLANA', solana)
      CALL prnmat('SOLCAL', solcal)
      CALL prnmat('ERRSOL', errsol)
   END IF
   WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
        &    norm2(errsol) / norm2(solana)
   WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
 !
   CALL putarr(fid, '/xgrid', xgrid, 'r')
   CALL putarr(fid, '/ygrid', ygrid, '\theta')
   CALL putarr(fid, '/sol', solcal, 'Solutions')
   CALL putarr(fid, '/solana', solana,'Exact solutions')
   CALL putarr(fid, '/errors', errsol, 'Errors')
 !
 !   Check derivatives d/dx and d/dy
 !
   WRITE(*,'(/a)') '*** Checking gradient'
   DO i=0,nx
      DO j=0,ny
         IF( mbess .EQ. 0 ) THEN
            solana(i,j) = -2.0d0 * xgrid(i)
         ELSE
            solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
              &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
         END IF
      END DO
   END DO
 !
   jder = (/1,0/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   errsol = solana - solcal
   CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
 !
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
      END DO
   END DO
 !
   jder = (/0,1/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
   errsol = solana - solcal
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
 !
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s)   ', tconv
   WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s)      ', treord
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
   WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s)         ', tsolv
   WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
 !===========================================================================
 !              5.0 Clear the matrix and recompute
 !
   WRITE(*,'(/a)') 'Recompute the solver ...'
   t0 = seconds()
   CALL clear_mat(mat)
   CALL dismat(splxy, mat)
   CALL ibcmat(mat, ny)
   tmat = seconds()-t0
 !
   t0 = seconds()
 !!$  CALL numfact(mat)
   CALL factor(mat, nlreord=.FALSE.)
   tfact = seconds()-t0
   gflops1 = mat%p%dparm(23) / tfact / 1.d9
 !
   t0 = seconds()
   ALLOCATE(newsol(nrank))
   CALL bsolve(mat, rhs, newsol)
   newsol(1:ny-1) = newsol(ny)
   tsolv = seconds()-t0
 !
   WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s)         ', tsolv
   WRITE(*,'(a,1pe12.3)') 'Total (s)                     ', tmat+tfact+tsolv
   WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
 !
   DEALLOCATE(newsol)
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(xgrid, rhs, sol)
   DEALLOCATE(solcal, solana, errsol)
   DEALLOCATE(bcoef)
   DEALLOCATE(arr)
   CALL destroy_sp(splxy)
   CALL destroy(mat)
 !
   CALL closef(fid)
 !===========================================================================
 !
 CONTAINS
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:,:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i, j
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        DO j=1,SIZE(x,2)
           sum2 = sum2 + x(i,j)**2
        END DO
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
   SUBROUTINE prnmat(label, mat)
     CHARACTER(len=*) :: label
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
     INTEGER :: i
     WRITE(*,'(/a)') TRIM(label)
     DO i=1,SIZE(mat,1)
        WRITE(*,'(10(1pe12.3))') mat(i,:)
     END DO
   END SUBROUTINE prnmat
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
diff --git a/examples/pde3d.f90 b/examples/pde3d.f90
index 9fed6e9..b60e046 100644
--- a/examples/pde3d.f90
+++ b/examples/pde3d.f90
@@ -1,396 +1,396 @@
 !>
 !> @file pde3d.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Solving the following 3d PDE using splines:
 !
 !    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my)cos(z)^n, with f(x=1,y) = 0
 !    exact solution: f(x,y) = (1-x^2) x^m cos(my)cos(z)^n
 !
   USE futils
   USE fft
   USE pde3d_mod
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, nz, nidbas(3), ngauss(3), mbess, npow, nterms
   LOGICAL :: nlppform
   INTEGER :: i, j, k, kk, ij, dimx, dimy, dimz, nrank, kl, ku
   INTEGER :: jder(3), it
   DOUBLE PRECISION :: pi, coefx(5)
   DOUBLE PRECISION :: dy, dz
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE   :: xgrid, ygrid, zgrid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE   :: fftmass
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rhs, sol
   DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE   :: crhs
 !
   TYPE(spline2d1d), TARGET :: splxyz
   TYPE(spline2d), POINTER  :: splxy
   TYPE(gbmat)              :: mat
 !
   CHARACTER(len=128) :: file='pde3d.h5'
   INTEGER :: fid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
   DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
   DOUBLE PRECISION :: seconds, mem, dopla
   DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
   INTEGER :: nits=500
 !
   INTEGER, PARAMETER :: npart=10
   DOUBLE PRECISION, DIMENSION(npart) :: xp, yp, zp, fp_calc, fp_anal
 !
   INTEGER :: kmin, kmax
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE   :: fftmass_shifted
 !
   NAMELIST /newrun/ nx, ny, nz, nidbas, ngauss, mbess, npow, nlppform, coefx
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intervals in x
   ny = 8              ! Number of intervals in y
   nz = 8              ! Number of intervals in z
   nidbas = (/3,3,3/)  ! Degree of splines
   ngauss = (/4,4, 4/)    ! Number of Gauss points/interval
   mbess = 2           ! Exponent of differential problem
   npow = 2            ! Exponent of differential problem
   nterms = 2          ! Number of terms in weak form
   nlppform = .TRUE.   ! Use PPFORM for gridval or not
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x (=radial) & y (=poloidal) axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz))
 !
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
 ! 
   dy = 2.d0*pi/REAL(ny,8)        ! Equidistant in y
   ygrid = (/ (j*dy, j=0,ny) /)
 !
   dz = 2.0d0*pi/REAL(nz,8)       ! Equidistant in z
   zgrid = (/ (k*dz, k=0,nz) /)
 !
   WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
   WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz)
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NY', ny)
   CALL attach(fid, '/', 'NZ', nz)
   CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
   CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
   CALL attach(fid, '/', 'NIDBAS3', nidbas(3))
   CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
   CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
   CALL attach(fid, '/', 'NGAUSS2', ngauss(3))
   CALL attach(fid, '/', 'MBESS', mbess)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   t0 = seconds()
   CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid, splxyz, &
        &          (/.FALSE., .TRUE., .TRUE./), nlppform=nlppform)
   splxy => splxyz%sp12
 !
   WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
   WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
   WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Z', splxyz%sp3%knots
 !
 !   2D FE matrix assembly (in plane x-y)
 !
   nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
   kl = (nidbas(1)+1)*ny -1     ! Number of sub-diagnonals
   ku = kl                      ! Number of super-diagnonals
   WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
 !
   CALL init(kl, ku, nrank, nterms, mat)
   CALL dismat(splxy, mat)
 !!$  CALL putmat(fid, '/MAT0', mat, 'Assembled GB matrice')
   ALLOCATE(arr(nrank))
 !
 !   BC on Matrix
 !
   CALL ibcmat(mat, ny)
   tmat = seconds() - t0
 !
 !   3D RHS assembly
 !
   ALLOCATE(rhs(nrank,0:nz-1), sol(nrank,0:nz-1))
   CALL disrhs3(mbess, npow, splxyz, rhs)
 !
 !   FFT in z of RHS
 !
   ALLOCATE(crhs(nrank,0:nz-1))
   crhs = rhs
   CALL fourrow(crhs, 1)
   crhs = crhs/REAL(nz,8)
 !
 !   Apply Mass matrix to crhs
 !
   kmin =-nz/2
   kmax = nz/2-1
   CALL init_dft(splxyz%sp3, kmin, kmax)
   ALLOCATE(fftmass_shifted(kmin:kmax))
   ALLOCATE(fftmass(0:nz-1))
   CALL calc_fftmass(splxyz%sp3, fftmass_shifted)
   DO k=kmin,kmax
      fftmass(MODULO(k+nz,nz)) = fftmass_shifted(k)
   END DO
   DO k=0,nz-1
      crhs(:,k) = crhs(:,k)/fftmass(k)
   END DO
 !
 !   Fourier transform back crhs to real space in z
 !
   CALL fourrow(crhs, -1)
   rhs(:,:) = REAL(crhs(:,:),8)
 !
 !   BC on RHS
 !
   CALL ibcrhs3(rhs, ny)
 !
   CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
   WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
   t0 = seconds()
   CALL factor(mat)
   tfact = seconds() - t0
   gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
 
   t0 = seconds()
   CALL bsolve(mat, rhs, sol)
 !
 !   Backtransform of solution
 !
   DO k=0,nz-1
      sol(1:ny-1,k) = sol(ny,k)
   END DO
   tsolv = seconds() - t0
   gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
   CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
 !
 !   Spline coefficients, taking into account of periodicity in y and z
 !   Note: in SOL, y was numbered first.
 !
   dimx = splxy%sp1%dim
   dimy = splxy%sp2%dim
   dimz = splxyz%sp3%dim
   WRITE(*,'(/a,3i6)') 'dimx, dimy, dimz =', dimx, dimy, dimz
   ALLOCATE(bcoef(0:dimx-1, 0:dimy-1, 0:dimz-1))
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,ny) + i*ny + 1
         DO k=0,dimz-1
            kk = MODULO(k,nz)
            bcoef(i,j,k) = sol(ij,kk)
         END DO
      END DO
   END DO
   CALL putarr(fid, '/BCOEF', bcoef, 'Spline coefficients of solution')
 !===========================================================================
 !              4.0 Check the solution
 !
 !   Check function values computed with various method
 !
   WRITE(*,'(/a)') 'Check with analytical solutions ...'
   CALL RANDOM_NUMBER(xp)
   yp=0.0d0
   zp=0.0d0
   jder = (/0,0,0/)
   CALL gridval(splxyz, xp, yp, zp, fp_calc, jder, bcoef)
 !!$  WRITE(*,'(4a12)') 'X', 'CALC', 'ANAL', 'ERROR'
 !!$  DO i=1,npart
 !!$     fp_anal(i) = (1-xp(i)**2) * xp(i)**mbess &
 !!$          &        * COS(mbess*yp(i)) * COS(zp(i))**npow
 !!$     WRITE(*,'(4(1pe12.3))') xp(i), fp_calc(i), fp_anal(i), fp_calc(i)-fp_anal(i)
 !!$  END DO
 !
   ALLOCATE(solcal(0:nx,0:ny,0:nz))
   ALLOCATE(solana(0:nx,0:ny,0:nz))
   ALLOCATE(errsol(0:nx,0:ny,0:nz))
   DO i=0,nx
      DO j=0,ny
         DO k=0,nz
            solana(i,j,k) = (1-xgrid(i)**2) * xgrid(i)**mbess &
                 &        * COS(mbess*ygrid(j)) * COS(zgrid(k))**npow
         END DO
      END DO
   END DO
 !
   jder = (/0,0,0/)
   CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder, bcoef)
   t0 = seconds()
   CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder)
   tgrid = seconds()-t0
   errsol = solana - solcal
   WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
        &    norm2(errsol) / norm2(solana)
 !
   CALL putarr(fid, '/xgrid', xgrid, 'r')
   CALL putarr(fid, '/ygrid', ygrid, '\theta')
   CALL putarr(fid, '/zgrid', zgrid, '\phi')
   CALL putarr(fid, '/sol', solcal, 'Solutions')
   CALL putarr(fid, '/solana', solana,'Exact solutions')
 !
 !   Check derivative d/dx
 !
   DO i=0,nx
      DO j=0,ny
         DO k=0,nz
            IF( mbess .EQ. 0 ) THEN
               solana(i,j,k) = -2.0d0 * xgrid(i) * COS(zgrid(k))**npow
            ELSE
               solana(i,j,k) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
                    &        xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) * &
                    &        COS(zgrid(k))**npow
            END IF
         END DO
      END DO
   END DO
 !
   jder = (/1,0,0/)
   t0 = seconds()
   CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder)
   tgrid = tgrid + seconds()-t0
   errsol = solana - solcal
   CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
   CALL putarr(fid, '/derivx_exact', solana)
   WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dx', norm2(errsol)/norm2(solana)
 !
 !   Check derivative d/dy
 !
   DO i=0,nx
      DO j=0,ny
         DO k=0,nz
            solana(i,j,k) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * &
                 &        SIN(mbess*ygrid(j))* COS(zgrid(k))**npow
         END DO
      END DO
   END DO
 !
   jder = (/0,1,0/)
   t0 = seconds()
   CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder)
   tgrid = tgrid + seconds()-t0
   CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
   CALL putarr(fid, '/derivy_exact', solana)
   errsol = solana - solcal
   WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dy', norm2(errsol)/norm2(solana)
 !
 !   Check derivative d/dz
 !
   DO i=0,nx
      DO j=0,ny
         DO k=0,nz
            solana(i,j,k) = -npow*(1-xgrid(i)**2) * xgrid(i)**mbess &
                 &        * COS(mbess*ygrid(j)) * COS(zgrid(k))**(npow-1) &
                 &        * SIN(zgrid(k))
         END DO
      END DO
   END DO
 !
   jder = (/0,0,1/)
   t0 = seconds()
   IF(nlppform) THEN
      CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder)
   ELSE
      CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder, bcoef)
   END IF
   tgrid = tgrid + seconds()-t0
   CALL putarr(fid, '/derivz', solcal, 'd/dz of solutions')
   CALL putarr(fid, '/derivz_exact', solana)
   errsol = solana - solcal
   WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dz', norm2(errsol)/norm2(solana)
 !===========================================================================
 !              9.0  Epilogue
 !
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
   WRITE(*,'(a,1pe12.3)') 'gridval time (s)              ', tgrid
   WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
   WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
 !
   DEALLOCATE(xgrid, ygrid, zgrid)
   DEALLOCATE(rhs, sol)
   DEALLOCATE(crhs)
   DEALLOCATE(fftmass)
   DEALLOCATE(fftmass_shifted)
   DEALLOCATE(bcoef)
   DEALLOCATE(solcal, solana, errsol)
   DEALLOCATE(arr)
   CALL destroy_sp(splxyz)
   CALL destroy(mat)
 !
   CALL closef(fid)
 !===========================================================================
 !
 CONTAINS
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:,:,:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i, j
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        DO j=1,SIZE(x,2)
           DO k=1,SIZE(x,3)
              sum2 = sum2 + x(i,j,k)**2
           END DO
        END DO
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
   SUBROUTINE prnmat(label, mat)
     CHARACTER(len=*) :: label
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
     INTEGER :: i
     WRITE(*,'(/a)') TRIM(label)
     DO i=1,SIZE(mat,1)
        WRITE(*,'(10(1pe12.3))') mat(i,:)
     END DO
   END SUBROUTINE prnmat
 END PROGRAM main
 !
 !+++
diff --git a/examples/pde3d_mod.f90 b/examples/pde3d_mod.f90
index 79c3edf..20de074 100644
--- a/examples/pde3d_mod.f90
+++ b/examples/pde3d_mod.f90
@@ -1,397 +1,397 @@
 !>
 !> @file pde3d_mod.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE pde3d_mod
   USE bsplines
   USE matrix
   IMPLICIT NONE
 !
 CONTAINS
   SUBROUTINE dismat(spl, mat)
 !
 !   Assembly of FE matrix mat using spline spl
 !
     TYPE(spline2d), INTENT(in) :: spl
     TYPE(gbmat), INTENT(inout) :: mat
 !
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2
     INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION:: contrib
 !
     INTEGER :: kterms         ! Number of terms in weak form
     INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw  ! Derivative order
     DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE  :: coefs ! Terms in weak form
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
     WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
     WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
 !
 !   Weak form
 !
     kterms = mat%nterms
     ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms))
 !
     ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative
     ALLOCATE(fun2(0:nidbas2,0:1)) ! 
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !===========================================================================
 !              2.0 Assembly loop
 !
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), spl%sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                 CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs)
                 DO iterm=1,kterms
                    DO iw1=0,nidbas1  ! Weight function in dir 1
                       igw1 = i+iw1
                       DO iw2=0,nidbas2  ! Weight function in dir 2
                          igw2 = MODULO(j+iw2-1, n2) + 1
                          irow = igw2 + (igw1-1)*n2
                          DO it1=0,nidbas1  ! Test function in dir 1
                             igt1 = i+it1
                             DO it2=0,nidbas2  ! Test function in dir 2
                                igt2 = MODULO(j+it2-1, n2) + 1
                                jcol = igt2 + (igt1-1)*n2
                                contrib = fun1(iw1,iderw(iterm,1)) * &
                                     &    fun2(iw2,iderw(iterm,2)) * &
                                     &    coefs(iterm) *             &
                                     &    fun2(it2,idert(iterm,2)) * &
                                     &    fun1(it1,idert(iterm,1)) * &
                                     &    wg1(ig1) * wg2(ig2)
                                CALL updtmat(mat, irow, jcol, contrib)
                             END DO
                          END DO
                       END DO
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
     DEALLOCATE(idert, iderw, coefs)
 !
   CONTAINS
     SUBROUTINE coefeq(x, y, idt, idw, c)
       DOUBLE PRECISION, INTENT(in) :: x, y
       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
       DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
 !
 ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
 !
       c(1) = x        ! 
       idt(1,1) = 1
       idt(1,2) = 0
       idw(1,1) = 1
       idw(1,2) = 0
 !
       c(2) = 1.d0/x
       idt(2,1) = 0
       idt(2,2) = 1
       idw(2,1) = 0
       idw(2,2) = 1
     END SUBROUTINE coefeq
   END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE disrhs3(mbess, npow, spl, rhs)
 !
 !   Assembly the RHS using 3d spline spl
 !
     INTEGER, INTENT(in)           :: mbess, npow
     TYPE(spline2d1d), TARGET      :: spl
     DOUBLE PRECISION, INTENT(out) :: rhs(:,:)
 !
     TYPE(spline1d), POINTER :: sp1, sp2, sp3
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: n3, nidbas3, ndim3, ng3
     INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:)
     DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     sp1 => spl%sp12%sp1
     sp2 => spl%sp12%sp2
     sp3 => spl%sp3
 !
     CALL get_dim(sp1, ndim1, n1, nidbas1)
     CALL get_dim(sp2, ndim2, n2, nidbas2) 
     CALL get_dim(sp3, ndim3, n3, nidbas3) 
 !
     ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     CALL get_gauss(sp1, ng1)
     CALL get_gauss(sp2, ng2)
     CALL get_gauss(sp3, ng3)
     WRITE(*,'(/a, 3i3)') 'Gauss points and weights, ngauss =', ng1, ng2, ng3
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng2), wg2(ng2))
     ALLOCATE(xg3(ng3), wg3(ng3))
 !===========================================================================
 !              2.0 Assembly loop
 !
     nrank = SIZE(rhs,1)
     rhs(1:nrank,1:n3) = 0.0d0
 !
     DO i=1,n1
        CALL get_gauss(sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), sp2, fun2, j)
                 DO k=1,n3
                    CALL get_gauss(sp3, ng3, k, xg3, wg3)
                    DO ig3=1,ng3
                       CALL basfun(xg3(ig3), sp3, fun3, k)
                       contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * &
                            &    rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow)
                       DO k1=0,nidbas1
                          i1 = i+k1
                          DO k2=0,nidbas2
                             j2 = MODULO(j+k2-1,n2) + 1
                             ij = j2 + (i1-1)*n2
                             DO k3=0,nidbas3
                                kk = MODULO(k+k3-1,n3) + 1
                                rhs(ij,kk) = rhs(ij, kk) + &
                                     &  contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1)
                             END DO
                          END DO
                       END DO
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
     DEALLOCATE(xg3, wg3, fun3)
 !
   CONTAINS
     DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n)
       DOUBLE PRECISION, INTENT(in) :: x1, x2, x3
       INTEGER, INTENT(in) :: m, n
       rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n
     END FUNCTION rhseq
   END SUBROUTINE disrhs3
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
     DOUBLE PRECISION, INTENT(in) :: c(5)
     INTEGER, INTENT(iN) :: nx
     DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
     INTEGER :: nintg
     DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
     DOUBLE PRECISION :: a, b, dx, f0, f1, scal
     INTEGER :: i, k
 !
     a=x(0)
     b=x(nx)
     nintg = 10*nx
     ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
     dx = (b-a)/REAL(nintg)
     xint(0) = a
     fint(0) = 0.0d0
     f1 = fdist(xint(0))
     DO i=1,nintg
        f0 = f1
        xint(i) = xint(i-1) + dx
        f1 = fdist(xint(i))
        fint(i) = fint(i-1) + 0.5*(f0+f1)
     END DO
 !
 !  Normalization
 !
     scal = REAL(nx) / fint(nintg)
     fint(0:nintg) = fint(0:nintg) * scal
 !
 !  Obtain mesh point by (inverse) interpolation
 !
     k = 1
     DO i=1,nintg-1
        IF( fint(i) .GE. REAL(k) ) THEN
           x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
                &   (k-fint(i))
           k = k+1
        END IF
     END DO
 !
     DEALLOCATE(xint, fint)
   CONTAINS
     DOUBLE PRECISION FUNCTION fdist(x)
       DOUBLE PRECISION, INTENT(in) :: x
       fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
     END FUNCTION fdist
   END SUBROUTINE meshdist
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcmat(mat, ny)
 !
 !   Apply BC on matrix
 !
     TYPE(gbmat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: ny
     INTEGER :: kl, ku, nrank, i, j
     DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
 !===========================================================================
 !              1.0 Prologue
 !
     kl = mat%kl
     ku = mat%ku
     nrank = mat%rank
     ALLOCATE(zsum(nrank), arr(nrank))
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum on the NY-th row
 !
     zsum = 0.0d0
     DO i=1,ny
        arr = 0.0d0
        CALL getrow(mat, i, arr)
        DO j=1,ny+ku
           zsum(j) = zsum(j) + arr(j)
        END DO
     END DO
     CALL putrow(mat, ny, zsum)
 !
 !   The horizontal sum on the NY-th column
 !
     zsum = 0.0d0
     DO j=1,ny
        arr = 0.0d0
        CALL getcol(mat, j, arr)
        DO i=ny,ny+kl
           zsum(i) = zsum(i) + arr(i)
        END DO
     END DO
     CALL putcol(mat, ny, zsum)
 !
 !   The away operator
 !
     DO j = 1,ny-1
        arr = 0.0d0; arr(j) = 1.0d0
        CALL putcol(mat, j, arr)     
     END DO
 !
     DO i = 1,ny-1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !  
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     DO j = nrank, nrank-ny+1, -1
        arr = 0.0d0; arr(j) = 1.0d0
        CALL putcol(mat, j, arr)     
     END DO
     !
     DO i = nrank, nrank-ny+1, -1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(zsum, arr)
 !
   END SUBROUTINE ibcmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcrhs3(rhs, ny)
 !
 !   Apply BC on RHS
 !
     DOUBLE PRECISION, INTENT(inout) :: rhs(:,:)
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank, nz, k
     DOUBLE PRECISION :: zsum
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = SIZE(rhs,1)
     nz = SIZE(rhs,2)
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum
 !
     DO k=1,nz
        zsum = SUM(rhs(1:ny,k))
        rhs(ny,k) = zsum
        rhs(1:ny-1,k) = 0.0d0
     END DO
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     DO k=1,nz
        rhs(nrank-ny+1:nrank,k) = 0.0d0
     END DO
   END SUBROUTINE ibcrhs3
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE pde3d_mod
diff --git a/examples/poisson_mumps.f90 b/examples/poisson_mumps.f90
index 941a715..f495bd9 100644
--- a/examples/poisson_mumps.f90
+++ b/examples/poisson_mumps.f90
@@ -1,169 +1,169 @@
 !>
 !> @file poisson_mumps.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
   USE mumps_bsplines
   USE cds
 !
   IMPLICIT NONE
   INCLUDE 'mpif.h'
   TYPE(mumps_mat) :: amat
   TYPE(cds_mat) :: amat_cds
   DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:), arow(:)
   INTEGER :: nx=5, ny=4
   INTEGER :: n
   INTEGER :: i, j, irow
   INTEGER :: ierr, me
   INTEGER, ALLOCATABLE :: dists(:)
   DOUBLE PRECISION :: t0
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
   IF(me.EQ.0) THEN
      WRITE(*,'(a)', advance='no') 'Enter nx, ny: '
      READ(*,*) nx, ny
   END IF
   CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   n = nx*ny  ! Rank of the matrix
   ALLOCATE(rhs(n))
   ALLOCATE(sol(n))
   ALLOCATE(arow(n))
 !
   WRITE(*,'(/a)') 'Mumps using CSR mat ...'
   CALL init(n, 1, amat)
 !
 !  Construct the matrix and RHS
 !
   t0 = mpi_wtime(0)
   DO j=1,ny
      DO i=1,nx
         arow = 0.0d0
         irow = numb(i,j)
         arow(irow) = 4.0d0
         IF(i.GT.1)  arow(numb(i-1,j)) = -1.0d0
         IF(i.LT.nx) arow(numb(i+1,j)) = -1.0d0
         IF(j.GT.1)  arow(numb(i,j-1)) = -1.0d0
         IF(j.LT.ny) arow(numb(i,j+1)) = -1.0d0
         CALL putrow(amat, irow, arow)
         rhs(irow) = SUM(arow)   ! => the exact solution is 1
      END DO
   END DO
 !
   WRITE(*,'(a,i6)') 'Rank of matrix', n
   WRITE(*,'(a,i6)') 'Number of non-zeros of matrix', get_count(amat)
   WRITE(*,'(a,1pe12.3)') 'Matrix construction time (s)', mpi_wtime()-t0
 !
 !  Factor the amat matrix (Reordering, symbolic and numerical factorization)
 !
   t0 = mpi_wtime(0)
   CALL factor(amat, nlmetis=.TRUE.)
   sol=rhs
   CALL bsolve(amat, sol)
   WRITE(*,'(a,1pe12.3)') 'Direct solve time (s)', mpi_wtime()-t0
 !
   IF(me.EQ.0) THEN
      WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0))
   END IF
   CALL destroy(amat)
 !
 !   CDS matrix
 !
   WRITE(*,'(/a)') 'Mumps using CDS mat ...'
   IF(ALLOCATED(dists)) DEALLOCATE(dists)
   ALLOCATE(dists(-2:2))
   dists = [-nx, -1, 0, 1, nx]
   WRITE(*,'(a/(20i4))') 'dists used in INIT =', dists  
   CALL init(n, dists, 1, amat_cds)
 !
   t0 = mpi_wtime(0)
   DO j=1,ny
      DO i=1,nx
         arow = 0.0d0
         irow = numb(i,j)
         amat_cds%val(irow,0) = 4.0d0
         IF(i.GT.1)  amat_cds%val(irow,-1) = -1.0d0
         IF(i.LT.nx) amat_cds%val(irow,+1) = -1.0d0
         IF(j.GT.1)  amat_cds%val(irow,-2) = -1.0d0
         IF(j.LT.ny) amat_cds%val(irow,+2) = -1.0d0
      END DO
   END DO
   WRITE(*,'(a,1pe12.3)') 'Matrix construction time (s)', mpi_wtime()-t0
 !
 !   Compute dists of amat
   PRINT*, 'stat of mata%mat', ASSOCIATED(amat%mat)
   PRINT*, 'rank of mata', amat%mat%rank
   CALL mstruct(amat%mat, dists)
   WRITE(*,'(A/(20i4))') 'dists from MSTRUCT=', dists  
 !
   t0 = mpi_wtime(0)
   CALL cds2mumps(amat_cds, amat)
   CALL factor(amat, debug=.FALSE.)
   sol = rhs
   CALL bsolve(amat, sol)
   WRITE(*,'(a,1pe12.3)') 'Direct solve time (s)', mpi_wtime()-t0
 !
   IF(me.EQ.0) THEN
      WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0))
   END IF
 !
 !   Clean up
 !
   DEALLOCATE(rhs)
   DEALLOCATE(sol)
   DEALLOCATE(arow)
   CALL destroy(amat)
   CALL mpi_finalize(ierr)
 CONTAINS
   SUBROUTINE mstruct(mat, dists)
     TYPE(spmat), INTENT(in) :: mat
     INTEGER, ALLOCATABLE, INTENT(inout) :: dists(:)
     TYPE(elt), POINTER :: t
     INTEGER :: n, i, j0
     j0 = LBOUND(dists,1)
     n = mat%rank
     PRINT*, 'rank of mat', n
     DO i=1,n                    ! scan the matrix rows   
        t => mat%row(i)%row0
        DO WHILE(ASSOCIATED(t))  ! walk thru the linked list row(i)
           j = t%index
           IF(ABS(t%val) .LE. EPSILON(0.0d0)) THEN
              dists(j0) = t%index-i ! distance from main diag
              j0 = j0+1
           END IF
           t => t%next
        END DO
     END DO
   END SUBROUTINE mstruct
   INTEGER FUNCTION numb(i,j)
 !
 !  One-dimensional numbering
 !  Number first x then y
 !
     INTEGER, INTENT(in) :: i, j
     numb = (j-1)*nx + i
   END FUNCTION numb
 END PROGRAM main
diff --git a/examples/poisson_petsc.f90 b/examples/poisson_petsc.f90
index 341afba..87dde81 100644
--- a/examples/poisson_petsc.f90
+++ b/examples/poisson_petsc.f90
@@ -1,218 +1,218 @@
 !>
 !> @file poisson_petsc.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
   USE petsc_bsplines
   IMPLICIT NONE
   TYPE(petsc_mat) :: amat
   DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:), arow(:)
   INTEGER :: nx=5, ny=4, ntrials=10
   INTEGER :: nitmax=10000, nits
   DOUBLE PRECISION :: rtol=1.e-9
   INTEGER :: n, nnz, nnz_loc
   INTEGER :: i, j, irow, jcol
   INTEGER :: ierr, me, npes, istart, iend
   DOUBLE PRECISION :: t0
   INTEGER :: ncols, cols(5)   !  Max nnz by row   .LE. 5
 !
   CHARACTER(len=128) :: matfile='mat.dat', rhsfile='rhs.dat'
   LOGICAL :: file_exist
 !
   NAMELIST /newrun/ nx, ny, nitmax, rtol, matfile, rhsfile
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
   CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
 !
   IF(me.EQ.0) THEN
      READ(*, newrun)
      WRITE(*, newrun)
      WRITE(*,'(a,i6)') 'npes =', npes
   END IF
   CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nitmax, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(matfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(rhsfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
 !
   n = nx*ny  ! Rank of the matrix
 !
 !   Initialize matrix
 !
   CALL init(n, 1, amat, comm=MPI_COMM_WORLD)
   istart = amat%istart
   iend   = amat%iend
 !!$  WRITE(*,'(a,i3.3,a,3i6)') 'PE', me, ': istart, iend', istart, iend
 !
 !
   INQUIRE(file=TRIM(matfile), exist=file_exist)
 !
   IF( file_exist ) THEN
      CALL mpi_barrier(MPI_COMM_WORLD,ierr)
      t0 = mpi_wtime()
      CALL load_mat(amat, matfile)
      CALL mpi_barrier(MPI_COMM_WORLD,ierr)
      IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'Mat. loading time (s)', mpi_wtime()-t0
   ELSE
 !
 !  Construct the matrix
 !
      CALL mpi_barrier(MPI_COMM_WORLD,ierr)
      t0 = mpi_wtime()
      ALLOCATE(arow(5))   !  Max nnz per row .LE. 5
      DO j=1,ny
         DO i=1,nx
            irow = numb(i,j)
            IF( irow.GE.istart .AND. irow.LE.iend) THEN
               ncols = 1; cols(ncols) = irow; arow(ncols) = 4.0d0
               IF(i.GT.1)  THEN 
                  ncols = ncols+1
                  cols(ncols) = numb(i-1,j); arow(ncols) = -1.0d0
               END IF
               IF(i.LT.nx) THEN
                  ncols = ncols+1
                  cols(ncols) = numb(i+1,j); arow(ncols) = -1.0d0
               END IF
               IF(j.GT.1)  THEN
                  ncols = ncols+1
                  cols(ncols) = numb(i,j-1); arow(ncols) = -1.0d0
               END IF
               IF(j.LT.ny) THEN 
                  ncols = ncols+1
                  cols(ncols) = numb(i,j+1); arow(ncols) = -1.0d0
               END IF
               CALL putrow(amat, irow, arow(1:ncols), cols(1:ncols))
            END IF
         END DO
      END DO
      DEALLOCATE(arow)
      CALL mpi_barrier(MPI_COMM_WORLD,ierr)
      IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'Mat. construction time (s)', mpi_wtime()-t0
 !
 !   Convert to PETSC mat
 !
      CALL mpi_barrier(MPI_COMM_WORLD,ierr)
      t0=mpi_wtime()
      CALL to_mat(amat)
      CALL mpi_barrier(MPI_COMM_WORLD,ierr)
      IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'Mat. conversion time (s)', mpi_wtime()-t0
 !
      CALL save_mat(amat, matfile)
   END IF
 !
 !  Matrix size and partition could have changed after loading from file!
 !
   n = amat%rank
   istart = amat%istart
   iend   = amat%iend
 !
   nnz_loc = get_count(amat)
   CALL mpi_reduce(nnz_loc, nnz, 1, MPI_INTEGER, mpi_sum, 0, MPI_COMM_WORLD, ierr)
   IF(npes.LE.4) THEN
      WRITE(*,'(a,i3.3,a,3i6)') 'PE', me, ': istart, iend (after), nloc, nnz_loc', &
           &   istart, iend, iend-istart+1, nnz_loc
   END IF
 !
 !  Construct or read  RHS
 !
   CALL mpi_barrier(MPI_COMM_WORLD,ierr)
   t0 = mpi_wtime()
   ALLOCATE(rhs(n))
   INQUIRE(file=TRIM(rhsfile), exist=file_exist)
   IF( file_exist ) THEN
      OPEN(unit=99, file=TRIM(rhsfile), status='old', form='unformatted')
      READ(99) rhs(1:n)
      CLOSE(99)
      CALL mpi_barrier(MPI_COMM_WORLD,ierr)
      IF(me.EQ.0) THEN
         WRITE(*,'(a,1pe12.3)') 'RHS read time (s)', mpi_wtime()-t0
      END IF
   ELSE
      rhs = 0.0d0
      ALLOCATE(arow(n))
      DO i=istart, iend 
         arow = 0.0d0
         CALL getrow(amat, i, arow)
         rhs(i) = SUM(arow)   ! => the exact solution is 1
      END DO
      arow = rhs
      CALL mpi_allreduce(arow, rhs, n, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr)
      DEALLOCATE(arow)
      IF( me.EQ.0 ) THEN   ! All processes have the gobla rhs
         OPEN(unit=99, file=TRIM(rhsfile), status='new', form='unformatted')
         WRITE(99) rhs(1:n)
         CLOSE(99)
      END IF
      CALL mpi_barrier(MPI_COMM_WORLD,ierr)
      IF(me.EQ.0) THEN
         WRITE(*,'(a,1pe12.3)') 'RHS construction time (s)', mpi_wtime()-t0
      END IF
   END IF
   CLOSE(99)
   CALL mpi_barrier(MPI_COMM_WORLD,ierr)
 !
 !  Back solve
 ! 
   ALLOCATE(sol(n))
 !
   CALL mpi_barrier(MPI_COMM_WORLD,ierr)
   sol = 0.0d0
   t0=mpi_wtime()
   CALL bsolve(amat, rhs, sol, rtol, nitmax, nits)
   CALL mpi_barrier(MPI_COMM_WORLD,ierr)
   IF(me.EQ.0) THEN
      WRITE(*,'(a,1pe12.3,i8,1pe12.3)') 'Error, nits, solve time(s)', &
           &   MAXVAL(ABS(sol-1.0d0)), nits, mpi_wtime()-t0
   END IF
   CALL mpi_barrier(MPI_COMM_WORLD,ierr)
   t0=mpi_wtime()
   DO i=1,ntrials
      sol = 0.0d0
      CALL bsolve(amat, rhs, sol, rtol, nitmax, nits)
   END DO
   CALL mpi_barrier(MPI_COMM_WORLD,ierr)
   IF(me.EQ.0) THEN
      WRITE(*,'(a,1pe12.3,i8,1pe12.3)') 'Error, nits, solve time(s)', &
           &   MAXVAL(ABS(sol-1.0d0)), nits, (mpi_wtime()-t0)/REAL(ntrials)
   END IF
 !
 !   Clean up
 !
   DEALLOCATE(rhs)
   DEALLOCATE(sol)
   CALL destroy(amat)
   CALL PetscFinalize(ierr)
   CALL mpi_finalize(ierr)
 CONTAINS
   INTEGER FUNCTION numb(i,j)
 !
 !  One-dimensional numbering
 !  Number first x then y
 !
     INTEGER, INTENT(in) :: i, j
     numb = (j-1)*nx + i
   END FUNCTION numb
 END PROGRAM main
diff --git a/examples/ppde3d.f90 b/examples/ppde3d.f90
index 00e4f06..6a895e5 100644
--- a/examples/ppde3d.f90
+++ b/examples/ppde3d.f90
@@ -1,510 +1,510 @@
 !>
 !> @file ppde3d.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Solving the following 3d PDE using splines:
 !
 !    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my)cos(z)^n, with f(x=1,y) = 0
 !    exact solution: f(x,y) = (1-x^2) x^m cos(my)cos(z)^n
 !
   USE futils
   USE fft
   USE pputils2, ONLY : pptransp
   USE ppde3d_mod
 !
   IMPLICIT NONE
 !
   CHARACTER(len=128) :: infile="ppde3d.in"
   INTEGER :: l
   INTEGER :: nx, ny, nz, nidbas(3), ngauss(3), mbess, npow, nterms
   INTEGER :: startz, endz, nzloc
   INTEGER :: start_rank, end_rank, nrank_loc
   LOGICAL :: nlppform
   INTEGER :: i, j, k, kk, ij, dimx, dimy, dimz, nrank, kl, ku
   INTEGER :: jder(3), it
   DOUBLE PRECISION :: pi, coefx(5)
   DOUBLE PRECISION :: dy, dz
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE   :: xgrid, ygrid, zgrid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE   :: fftmass
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rhs, sol, rhs_t
   DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE   :: crhs_t
 !
   TYPE(spline2d1d), TARGET :: splxyz
   TYPE(spline2d), POINTER  :: splxy
   TYPE(spline1d)           :: splz
   TYPE(gbmat)              :: mat
 !
   CHARACTER(len=128) :: file='ppde3d.h5'
   INTEGER :: fid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
   DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
   DOUBLE PRECISION :: seconds, mem, dopla
   DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
   INTEGER :: nits=500
 !
   INTEGER, PARAMETER :: npart=10000000
   INTEGER :: nploc
   DOUBLE PRECISION, DIMENSION(npart) :: xp, yp, zp, fp_calc, fp_anal
   DOUBLE PRECISION zsuml, zsumg, errnorm2
 !
   NAMELIST /newrun/ nx, ny, nz, nidbas, ngauss, mbess, npow, nlppform, coefx
 !===========================================================================
 !              1.0 Prologue
 !
 !   Init MPI
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
 !   Get input file name from command argument
 !
   IF( COMMAND_ARGUMENT_COUNT() .EQ. 1 ) THEN
      CALL GET_COMMAND_ARGUMENT(1, infile, l, ierr)
   END IF
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intervals in x
   ny = 8              ! Number of intervals in y
   nz = 8              ! Number of intervals in z
   nidbas = (/3,3,3/)  ! Degree of splines
   ngauss = (/4,4, 4/)    ! Number of Gauss points/interval
   mbess = 2           ! Exponent of differential problem
   npow = 2            ! Exponent of differential problem
   nterms = 2          ! Number of terms in weak form
   nlppform = .TRUE.   ! Use PPFORM for gridval or not
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   OPEN(unit=99, file=TRIM(infile), status='old', action='read')
   READ(99,newrun)
   IF( me.EQ.0) THEN
      WRITE(*,newrun)
   END IF
   CLOSE(99)
 !
 !   Define grid on x (=radial) & y (=poloidal) axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz))
 !
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
 ! 
   dy = 2.d0*pi/REAL(ny,8)        ! Equidistant in y
   ygrid = (/ (j*dy, j=0,ny) /)
 !
 !   Partitionned toroidal grid z
 !
   dz = 2.0d0*pi/REAL(nz,8)       ! Equidistant in z
   zgrid = (/ (k*dz, k=0,nz) /)
   CALL dist1d(0, nz, startz, nzloc)
   endz = startz+nzloc
 !!$  PRINT*, 'PE', me, ' startz, endz, nzloc', startz, endz, nzloc
 !
   IF( me.EQ.0) THEN
      WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
      WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
      WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz)
   END IF
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE2D Result File', real_prec='d', &
        &      mpicomm=MPI_COMM_WORLD)
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NY', ny)
   CALL attach(fid, '/', 'NZ', nz)
   CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
   CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
   CALL attach(fid, '/', 'NIDBAS3', nidbas(3))
   CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
   CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
   CALL attach(fid, '/', 'NGAUSS2', ngauss(3))
   CALL attach(fid, '/', 'MBESS', mbess)
   CALL putarr(fid, '/xgrid', xgrid, 'r')
   CALL putarr(fid, '/ygrid', ygrid, '\theta')
   CALL putarr(fid, '/zgrid', zgrid(0:nz-1), '\phi')
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   t0 = seconds()
   CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid(startz:endz), &
        &          splxyz, (/.FALSE., .TRUE., .TRUE./), nlppform=nlppform)
   splxy => splxyz%sp12
 !
   IF( me.EQ.0) THEN
      WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
      WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
   END IF
   CALL disp(splxyz%sp3%knots, 'KNOTS in Z', MPI_COMM_WORLD)
 !
 !   2D FE matrix assembly (in plane x-y)
 !
   nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
   kl = (nidbas(1)+1)*ny -1     ! Number of sub-diagnonals
   ku = kl                      ! Number of super-diagnonals
   IF(me.EQ.0) THEN
      WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
   END IF
 !
   CALL init(kl, ku, nrank, nterms, mat)
   CALL dismat(splxy, mat)
   ALLOCATE(arr(nrank))
 !
 !   BC on Matrix
 !
   CALL ibcmat(mat, ny)
   tmat = seconds() - t0
 !
 !   3D RHS assembly
 !
   ALLOCATE(rhs(nrank,0:nzloc+nidbas(3)-1)) ! With right guard cells nzloc:nzloc+nidbas3-1
   ALLOCATE(sol(nrank,0:nzloc-1))
   CALL disrhs3(mbess, npow, splxyz, rhs)
 !
   zsuml = SUM(ABS(rhs(:,0:nzloc-1)))
   CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, &
        &          MPI_COMM_WORLD, ierr)
   IF(me.EQ.0) PRINT*, 'sum of rhs after DISRHS3', zsumg
 !
 !   FFT in z of RHS
 !
   CALL dist1d(1, nrank, start_rank, nrank_loc)
   end_rank = start_rank+nrank_loc-1
   ALLOCATE(rhs_t(0:nz-1,nrank_loc), crhs_t(0:nz-1,nrank_loc))
 !
   CALL pptransp(MPI_COMM_WORLD, rhs(:,0:nzloc-1), rhs_t)
   crhs_t = rhs_t
   CALL fourcol(crhs_t,1)
   crhs_t = crhs_t/REAL(nz,8)
 !
 !   Apply Mass matrix to crhs
 !
   PRINT*, 4
   CALL set_spline(nidbas(3), ngauss(3), zgrid, splz, .TRUE.)
   ALLOCATE(fftmass(0:nz-1))
   PRINT*, 5
 !!$  CALL calc_fftmass(splz, fftmass)
   CALL calc_fftmass_old(splz, fftmass)
   IF(me.EQ.0) THEN
      WRITE(*,'(/a/(10(1pe12.3)))') 'Mass matrix', fftmass
   END IF
   DO k=0,nz-1
      crhs_t(k,:) = crhs_t(k,:)/fftmass(k)
   END DO
 !
 !   Fourier transform back crhs to real space in z
 !
   CALL fourcol(crhs_t, -1)
   rhs_t(:,:) = REAL(crhs_t(:,:),8)
   CALL pptransp(MPI_COMM_WORLD, rhs_t, sol)  ! Put the final RHS in SOL
 !
 !   BC on RHS
 !
   CALL ibcrhs3(sol, ny)
 !
   IF(me.EQ.0) THEN
      WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
   END IF
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
   t0 = seconds()
   CALL factor(mat)
   tfact = seconds() - t0
   gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
 
   t0 = seconds()
   CALL bsolve(mat, sol)
 !
 !   Backtransform of solution
 !
   DO k=0,nzloc-1
      sol(1:ny-1,k) = sol(ny,k)
   END DO
 !
   zsuml = SUM(ABS(sol))
   CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, &
        &          MPI_COMM_WORLD, ierr)
   IF(me.EQ.0) PRINT*, 'sum of sol', zsumg
 !
   tsolv = seconds() - t0
   gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
 !
 !   Spline coefficients, taking into account of periodicity in y and z
 !   Note: in SOL, y was numbered first.
 !
   dimx = splxy%sp1%dim
   dimy = splxy%sp2%dim
   dimz = splxyz%sp3%dim
   ALLOCATE(bcoef(0:dimx-1, 0:dimy-1, 0:dimz-1))
 !
 !   Get 3D array of spline coefs.
 !
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,ny) + i*ny + 1
         DO k=0,nzloc-1
            bcoef(i,j,k) = sol(ij,k)
         END DO
      END DO
   END DO
 !
 !   Get missing coefs from remote guard cells
 !
   prev = MODULO(me-1,npes)
   next = MODULO(me+1,npes)
   count = dimx*dimy
   DO i=0,nidbas(3)-1
      CALL mpi_sendrecv(bcoef(0,0,i), count, MPI_DOUBLE_PRECISION, prev, 0, &
           &            bcoef(0,0,nzloc+i), count, MPI_DOUBLE_PRECISION, next, 0, &
           &            MPI_COMM_WORLD, status, ierr)
   END DO
 !
   IF(me.EQ.0) THEN
      WRITE(*,'(/a,3i6)') 'dimx, dimy, dimz =', dimx, dimy, dimz
   END IF
 !===========================================================================
 !              4.0 Check the solution
 !
 !   Check function values computed with various method
 !
   CALL RANDOM_NUMBER(xp)
   CALL RANDOM_NUMBER(yp); yp = 2.d0*pi*yp
   CALL RANDOM_NUMBER(zp); zp = 2.d0*pi*zp
   nploc = 0
   DO i=1,npart
      IF(zp(i).GE.zgrid(startz) .AND. zp(i).LT.zgrid(endz)) THEN
         nploc = nploc+1
         xp(nploc) = xp(i)
         yp(nploc) = yp(i)
         zp(nploc) = zp(i)
      END IF
   END DO
   jder = (/0,0,0/)
   CALL gridval(splxyz, xp(1:nploc), yp(1:nploc), zp(1:nploc), fp_calc(1:nploc), jder, bcoef)
   DO i=1,nploc
      fp_anal(i) = (1-xp(i)**2) * xp(i)**mbess &
           &        * COS(mbess*yp(i)) * COS(zp(i))**npow
   END DO
   errnorm2 =  norm21(fp_calc(1:nploc)-fp_anal(1:nploc))/norm21(fp_calc(1:nploc))
   IF(me.EQ.0) THEN
      WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors, using random points', &
           &    errnorm2
   END IF
 !
   ALLOCATE(solcal(0:nx,0:ny,0:nzloc-1))
   ALLOCATE(solana(0:nx,0:ny,0:nzloc-1))
   ALLOCATE(errsol(0:nx,0:ny,0:nzloc-1))
 !
   DO i=0,nx
      DO j=0,ny
         DO k=0,nzloc-1
            kk=startz+k
            solana(i,j,k) = (1-xgrid(i)**2) * xgrid(i)**mbess &
                 &        * COS(mbess*ygrid(j)) * COS(zgrid(kk))**npow
         END DO
      END DO
   END DO
 !
   jder = (/0,0,0/)
   CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef)
   t0 = seconds()
   CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
   tgrid = seconds()-t0
   errsol = solana - solcal
 !
   errnorm2 =  norm2(errsol) / norm2(solana)
   IF(me.EQ.0) THEN
      WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
           &    errnorm2
   END IF
   CALL putarr(fid, '/sol', solcal,pardim=3)
   CALL putarr(fid, '/solana', solana,pardim=3)
 !
 !   Check derivative d/dx
 !
   DO i=0,nx
      DO j=0,ny
         DO k=0,nzloc-1
            IF( mbess .EQ. 0 ) THEN
               solana(i,j,k) = -2.0d0 * xgrid(i) * COS(zgrid(k+startz))**npow
            ELSE
               solana(i,j,k) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
                    &        xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) * &
                    &        COS(zgrid(k+startz))**npow
            END IF
         END DO
      END DO
   END DO
 !
   jder = (/1,0,0/)
   t0 = seconds()
   CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
   tgrid = tgrid + seconds()-t0
   errsol = solana - solcal
   errnorm2 =  norm2(errsol) / norm2(solana)
   IF(me.EQ.0) THEN
      WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dx', errnorm2
   END IF
   CALL putarr(fid, '/derivx', solcal, pardim=3)
   CALL putarr(fid, '/derivx_exact', solana,pardim=3)
 !
 !   Check derivative d/dy
 !
   DO i=0,nx
      DO j=0,ny
         DO k=0,nzloc-1
            solana(i,j,k) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * &
                 &        SIN(mbess*ygrid(j))* COS(zgrid(k+startz))**npow
         END DO
      END DO
   END DO
 !
   jder = (/0,1,0/)
   t0 = seconds()
   CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
   tgrid = tgrid + seconds()-t0
   errsol = solana - solcal
   errnorm2 =  norm2(errsol) / norm2(solana)
   IF(me.EQ.0) THEN
      WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dy', errnorm2
   END IF
   CALL putarr(fid, '/derivy', solcal, pardim=3)
   CALL putarr(fid, '/derivy_exact', solana,pardim=3)
 !
 !   Check derivative d/dz
 !
   DO i=0,nx
      DO j=0,ny
         DO k=0,nzloc-1
            solana(i,j,k) = -npow*(1-xgrid(i)**2) * xgrid(i)**mbess &
                 &        * COS(mbess*ygrid(j)) * COS(zgrid(k+startz))**(npow-1) &
                 &        * SIN(zgrid(k+startz))
         END DO
      END DO
   END DO
 !
   jder = (/0,0,1/)
   t0 = seconds()
   IF(nlppform) THEN
      CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
   ELSE
      CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef)
   END IF
   tgrid = tgrid + seconds()-t0
   errsol = solana - solcal
   errnorm2 =  norm2(errsol) / norm2(solana)
   IF(me.EQ.0) THEN
      WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dz', errnorm2
   END IF
   CALL putarr(fid, '/derivz', solcal, pardim=3)
   CALL putarr(fid, '/derivz_exact', solana,pardim=3)
 !===========================================================================
 !              9.0  Epilogue
 !
   IF(me.EQ.0) THEN
      WRITE(*,'(/a)') '---'
      WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
      WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
      WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
      WRITE(*,'(a,1pe12.3)') 'gridval time (s)              ', tgrid
      WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
      WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
   END IF
 !
   DEALLOCATE(xgrid, ygrid, zgrid)
   DEALLOCATE(fftmass)
   DEALLOCATE(rhs)
   DEALLOCATE(sol)
   DEALLOCATE(rhs_t)
   DEALLOCATE(crhs_t)
   DEALLOCATE(bcoef)
   DEALLOCATE(solcal, solana, errsol)
   DEALLOCATE(arr)
   CALL destroy_sp(splxyz)
   CALL destroy_sp(splz)
   CALL destroy(mat)
 !
   CALL closef(fid)
 !
   CALL mpi_finalize(ierr)
 !===========================================================================
 !
 CONTAINS
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:,:,:)
     DOUBLE PRECISION :: sum2, sum2g
     INTEGER :: i, j
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        DO j=1,SIZE(x,2)
           DO k=1,SIZE(x,3)
              sum2 = sum2 + x(i,j,k)**2
           END DO
        END DO
     END DO
     CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
          &          MPI_COMM_WORLD, ierr)
     norm2 = SQRT(sum2g)
   END FUNCTION norm2
 !
   FUNCTION norm21(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm21
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: sum2, sum2g
     INTEGER :: i, j
 !
     sum2 = DOT_PRODUCT(x,x)
     CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
          &          MPI_COMM_WORLD, ierr)
     norm21 = SQRT(sum2g)
   END FUNCTION norm21
   SUBROUTINE prnmat(label, mat)
     CHARACTER(len=*) :: label
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
     INTEGER :: i
     WRITE(*,'(/a)') TRIM(label)
     DO i=1,SIZE(mat,1)
        WRITE(*,'(10(1pe12.3))') mat(i,:)
     END DO
   END SUBROUTINE prnmat
 END PROGRAM main
 !
 !+++
diff --git a/examples/ppde3d_mod.f90 b/examples/ppde3d_mod.f90
index c04c788..f4f7627 100644
--- a/examples/ppde3d_mod.f90
+++ b/examples/ppde3d_mod.f90
@@ -1,473 +1,473 @@
 !>
 !> @file ppde3d_mod.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE ppde3d_mod
   USE bsplines
   USE matrix
   IMPLICIT NONE
   INCLUDE "mpif.h"
 !
   INTEGER :: me, npes
   INTEGER :: prev, next
   INTEGER :: count, status(MPI_STATUS_SIZE), ierr
 !
 CONTAINS
   SUBROUTINE dismat(spl, mat)
 !
 !   Assembly of FE matrix mat using spline spl
 !
     TYPE(spline2d), INTENT(in) :: spl
     TYPE(gbmat), INTENT(inout) :: mat
 !
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2
     INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION:: contrib
 !
     INTEGER :: kterms         ! Number of terms in weak form
     INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw  ! Derivative order
     DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE  :: coefs ! Terms in weak form
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
 !
 !   Weak form
 !
     kterms = mat%nterms
     ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms))
 !
     ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative
     ALLOCATE(fun2(0:nidbas2,0:1)) ! 
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !===========================================================================
 !              2.0 Assembly loop
 !
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), spl%sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                 CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs)
                 DO iterm=1,kterms
                    DO iw1=0,nidbas1  ! Weight function in dir 1
                       igw1 = i+iw1
                       DO iw2=0,nidbas2  ! Weight function in dir 2
                          igw2 = MODULO(j+iw2-1, n2) + 1
                          irow = igw2 + (igw1-1)*n2
                          DO it1=0,nidbas1  ! Test function in dir 1
                             igt1 = i+it1
                             DO it2=0,nidbas2  ! Test function in dir 2
                                igt2 = MODULO(j+it2-1, n2) + 1
                                jcol = igt2 + (igt1-1)*n2
                                contrib = fun1(iw1,iderw(iterm,1)) * &
                                     &    fun2(iw2,iderw(iterm,2)) * &
                                     &    coefs(iterm) *             &
                                     &    fun2(it2,idert(iterm,2)) * &
                                     &    fun1(it1,idert(iterm,1)) * &
                                     &    wg1(ig1) * wg2(ig2)
                                CALL updtmat(mat, irow, jcol, contrib)
                             END DO
                          END DO
                       END DO
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
     DEALLOCATE(idert, iderw, coefs)
 !
   CONTAINS
     SUBROUTINE coefeq(x, y, idt, idw, c)
       DOUBLE PRECISION, INTENT(in) :: x, y
       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
       DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
 !
 ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
 !
       c(1) = x        ! 
       idt(1,1) = 1
       idt(1,2) = 0
       idw(1,1) = 1
       idw(1,2) = 0
 !
       c(2) = 1.d0/x
       idt(2,1) = 0
       idt(2,2) = 1
       idw(2,1) = 0
       idw(2,2) = 1
     END SUBROUTINE coefeq
   END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE disrhs3(mbess, npow, spl, rhs)
 !
 !   Assembly the RHS using 3d spline spl
 !
     INTEGER, INTENT(in)           :: mbess, npow
     TYPE(spline2d1d), TARGET      :: spl
     DOUBLE PRECISION, INTENT(out) :: rhs(:,:)
 !
     TYPE(spline1d), POINTER :: sp1, sp2, sp3
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: n3, nidbas3, ndim3, ng3
     INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: buf(:)
     DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     sp1 => spl%sp12%sp1
     sp2 => spl%sp12%sp2
     sp3 => spl%sp3
 !
     CALL get_dim(sp1, ndim1, n1, nidbas1)
     CALL get_dim(sp2, ndim2, n2, nidbas2) 
     CALL get_dim(sp3, ndim3, n3, nidbas3) 
 !
     ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     CALL get_gauss(sp1, ng1)
     CALL get_gauss(sp2, ng2)
     CALL get_gauss(sp3, ng3)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng2), wg2(ng2))
     ALLOCATE(xg3(ng3), wg3(ng3))
 !===========================================================================
 !              2.0 Assembly loop
 !
     nrank = SIZE(rhs,1)
     rhs = 0.0d0
 !
     DO i=1,n1
        CALL get_gauss(sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), sp2, fun2, j)
                 DO k=1,n3
                    CALL get_gauss(sp3, ng3, k, xg3, wg3)
                    DO ig3=1,ng3
                       CALL basfun(xg3(ig3), sp3, fun3, k)
                       contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * &
                            &    rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow)
                       DO k1=0,nidbas1
                          i1 = i+k1
                          DO k2=0,nidbas2
                             j2 = MODULO(j+k2-1,n2) + 1
                             ij = j2 + (i1-1)*n2
                             DO k3=0,nidbas3
                                kk = k+k3
                                rhs(ij,kk) = rhs(ij, kk) + &
                                     &  contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1)
                             END DO
                          END DO
                       END DO
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !
 !     Update from remote guard cells
 !
     CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
     CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
     next = MODULO(me+1,npes)
     prev = MODULO(me-1,npes)
     count = nrank
     ALLOCATE(buf(nrank))
     DO i=nidbas3,1,-1
        CALL mpi_sendrecv(rhs(1,n3+i), count, MPI_DOUBLE_PRECISION, next, 0, &
          &             buf, count, MPI_DOUBLE_PRECISION, prev, 0, &
          &             MPI_COMM_WORLD, status, ierr)
        rhs(:,i) =  rhs(:,i) + buf(:)
     END DO
     DEALLOCATE(buf)
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
     DEALLOCATE(xg3, wg3, fun3)
 !
   CONTAINS
     DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n)
       DOUBLE PRECISION, INTENT(in) :: x1, x2, x3
       INTEGER, INTENT(in) :: m, n
       rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n
     END FUNCTION rhseq
   END SUBROUTINE disrhs3
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
     DOUBLE PRECISION, INTENT(in) :: c(5)
     INTEGER, INTENT(iN) :: nx
     DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
     INTEGER :: nintg
     DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
     DOUBLE PRECISION :: a, b, dx, f0, f1, scal
     INTEGER :: i, k
 !
     a=x(0)
     b=x(nx)
     nintg = 10*nx
     ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
     dx = (b-a)/REAL(nintg)
     xint(0) = a
     fint(0) = 0.0d0
     f1 = fdist(xint(0))
     DO i=1,nintg
        f0 = f1
        xint(i) = xint(i-1) + dx
        f1 = fdist(xint(i))
        fint(i) = fint(i-1) + 0.5*(f0+f1)
     END DO
 !
 !  Normalization
 !
     scal = REAL(nx) / fint(nintg)
     fint(0:nintg) = fint(0:nintg) * scal
 !
 !  Obtain mesh point by (inverse) interpolation
 !
     k = 1
     DO i=1,nintg-1
        IF( fint(i) .GE. REAL(k) ) THEN
           x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
                &   (k-fint(i))
           k = k+1
        END IF
     END DO
 !
     DEALLOCATE(xint, fint)
   CONTAINS
     DOUBLE PRECISION FUNCTION fdist(x)
       DOUBLE PRECISION, INTENT(in) :: x
       fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
     END FUNCTION fdist
   END SUBROUTINE meshdist
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcmat(mat, ny)
 !
 !   Apply BC on matrix
 !
     TYPE(gbmat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: ny
     INTEGER :: kl, ku, nrank, i, j
     DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
 !===========================================================================
 !              1.0 Prologue
 !
     kl = mat%kl
     ku = mat%ku
     nrank = mat%rank
     ALLOCATE(zsum(nrank), arr(nrank))
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum on the NY-th row
 !
     zsum = 0.0d0
     DO i=1,ny
        arr = 0.0d0
        CALL getrow(mat, i, arr)
        DO j=1,ny+ku
           zsum(j) = zsum(j) + arr(j)
        END DO
     END DO
     CALL putrow(mat, ny, zsum)
 !
 !   The horizontal sum on the NY-th column
 !
     zsum = 0.0d0
     DO j=1,ny
        arr = 0.0d0
        CALL getcol(mat, j, arr)
        DO i=ny,ny+kl
           zsum(i) = zsum(i) + arr(i)
        END DO
     END DO
     CALL putcol(mat, ny, zsum)
 !
 !   The away operator
 !
     DO j = 1,ny-1
        arr = 0.0d0; arr(j) = 1.0d0
        CALL putcol(mat, j, arr)     
     END DO
 !
     DO i = 1,ny-1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !  
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     DO j = nrank, nrank-ny+1, -1
        arr = 0.0d0; arr(j) = 1.0d0
        CALL putcol(mat, j, arr)     
     END DO
     !
     DO i = nrank, nrank-ny+1, -1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(zsum, arr)
 !
   END SUBROUTINE ibcmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcrhs3(rhs, ny)
 !
 !   Apply BC on RHS
 !
     DOUBLE PRECISION, INTENT(inout) :: rhs(:,:)
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank, nz, k
     DOUBLE PRECISION :: zsum
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = SIZE(rhs,1)
     nz = SIZE(rhs,2)
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum
 !
     DO k=1,nz
        zsum = SUM(rhs(1:ny,k))
        rhs(ny,k) = zsum
        rhs(1:ny-1,k) = 0.0d0
     END DO
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     DO k=1,nz
        rhs(nrank-ny+1:nrank,k) = 0.0d0
     END DO
   END SUBROUTINE ibcrhs3
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dist1d(s0, ntot, s, nloc)
     INCLUDE 'mpif.h'
     INTEGER, INTENT(in) :: s0, ntot
     INTEGER, INTENT(out) :: s, nloc
     INTEGER :: me, npes, ierr, naver, rem
 !
     CALL MPI_COMM_SIZE(MPI_COMM_WORLD, npes, ierr)
     CALL MPI_COMM_RANK(MPI_COMM_WORLD, me, ierr)
     naver = ntot/npes
     rem = MODULO(ntot,npes)
     s = s0 + MIN(rem,me) + me*naver
     nloc = naver
     IF( me.LT.rem ) nloc = nloc+1
   END SUBROUTINE dist1d
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE disp(a, str, comm)
 !
 !   Gather partitionned 1d array to 0 and print it
 !
     INCLUDE 'mpif.h'
     DOUBLE PRECISION, INTENT(in) :: a(:)
     INTEGER, INTENT(in) :: comm
     CHARACTER(len=*), INTENT(in) :: str
     INTEGER :: n, ntot, npes, me, ierr, i
     DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:) :: c
     INTEGER, ALLOCATABLE, DIMENSION(:) :: counts, displs
 !
     CALL mpi_comm_rank(comm, me, ierr)
     CALL mpi_comm_size(comm, npes, ierr)
     n = SIZE(a)
     IF(me.EQ.0) THEN
        ALLOCATE(counts(npes), displs(npes+1))
     END IF
     CALL mpi_gather(n, 1, MPI_INTEGER, counts, 1, MPI_INTEGER, 0, comm, ierr)
     IF(me.EQ.0) THEN
        displs(1) = 0
        DO i=2,npes+1
           displs(i) = displs(i-1)+counts(i-1)
        END DO
        ntot = displs(npes+1)
        ALLOCATE(c(ntot))
        c = 0.0d0
     END IF
     CALL mpi_gatherv(a, n, MPI_DOUBLE_PRECISION, c, counts, displs, &
          &           MPI_DOUBLE_PRECISION, 0, comm, ierr)    
     IF( me.EQ.0 ) THEN
        WRITE(*,'(/a)') TRIM(str)
        DO i=1,npes
           WRITE(*,'(a,i3.3/(10(1pe12.3)))') 'PE', i-1,  &
                &        c(displs(i)+1:displs(i+1))
        END DO
        DEALLOCATE(c)
        DEALLOCATE(counts)
        DEALLOCATE(displs)
     END IF
   END SUBROUTINE disp
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE ppde3d_mod
diff --git a/examples/ppde3d_pb.f90 b/examples/ppde3d_pb.f90
index 850056b..800226f 100644
--- a/examples/ppde3d_pb.f90
+++ b/examples/ppde3d_pb.f90
@@ -1,518 +1,518 @@
 !>
 !> @file ppde3d_pb.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Solving the following 3d PDE using splines:
 !
 !    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my)cos(z)^n, with f(x=1,y) = 0
 !    exact solution: f(x,y) = (1-x^2) x^m cos(my)cos(z)^n
 !
   USE futils
   USE fft
   USE pputils2, ONLY : pptransp
   USE ppde3d_pb_mod
 !
   IMPLICIT NONE
 !
   CHARACTER(len=128) :: infile="ppde3d_pb.in"
   INTEGER :: l
   INTEGER :: nx, ny, nz, nidbas(3), ngauss(3), mbess, npow, nterms
   INTEGER :: startz, endz, nzloc
   INTEGER :: start_rank, end_rank, nrank_loc
   LOGICAL :: nlppform
   INTEGER :: i, j, k, kk, ij, dimx, dimy, dimz, nrank, kl, ku
   INTEGER :: jder(3), it
   DOUBLE PRECISION :: pi, coefx(5)
   DOUBLE PRECISION :: dy, dz
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE   :: xgrid, ygrid, zgrid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE   :: fftmass
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rhs, sol, rhs_t
   DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE   :: crhs_t
 !
   TYPE(spline2d1d), TARGET :: splxyz
   TYPE(spline2d), POINTER  :: splxy
   TYPE(spline1d)           :: splz
   TYPE(pbmat)              :: mat
 !
   CHARACTER(len=128) :: file='ppde3d_pb.h5'
   INTEGER :: fid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
   DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
   DOUBLE PRECISION :: seconds, mem, dopla
   DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
   INTEGER :: nits=500
 !
   INTEGER, PARAMETER :: npart=10000000
   INTEGER :: nploc
   DOUBLE PRECISION, DIMENSION(npart) :: xp, yp, zp, fp_calc, fp_anal
   DOUBLE PRECISION zsuml, zsumg, errnorm2
 !
   INTEGER :: kmin, kmax
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE   :: fftmass_shifted
 !
   NAMELIST /newrun/ nx, ny, nz, nidbas, ngauss, mbess, npow, nlppform, coefx
 !===========================================================================
 !              1.0 Prologue
 !
 !   Init MPI
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
 !   Get input file name from command argument
 !
   IF( COMMAND_ARGUMENT_COUNT() .EQ. 1 ) THEN
      CALL GET_COMMAND_ARGUMENT(1, infile, l, ierr)
   END IF
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intervals in x
   ny = 8              ! Number of intervals in y
   nz = 8              ! Number of intervals in z
   nidbas = (/3,3,3/)  ! Degree of splines
   ngauss = (/4,4, 4/)    ! Number of Gauss points/interval
   mbess = 2           ! Exponent of differential problem
   npow = 2            ! Exponent of differential problem
   nterms = 2          ! Number of terms in weak form
   nlppform = .TRUE.   ! Use PPFORM for gridval or not
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   OPEN(unit=99, file=TRIM(infile), status='old', action='read')
   READ(99,newrun)
   IF( me.EQ.0) THEN
      WRITE(*,newrun)
   END IF
   CLOSE(99)
 !
 !   Define grid on x (=radial) & y (=poloidal) axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz))
 !
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
 ! 
   dy = 2.d0*pi/REAL(ny,8)        ! Equidistant in y
   ygrid = (/ (j*dy, j=0,ny) /)
 !
 !   Partitionned toroidal grid z
 !
   dz = 2.0d0*pi/REAL(nz,8)       ! Equidistant in z
   zgrid = (/ (k*dz, k=0,nz) /)
   CALL dist1d(0, nz, startz, nzloc)
   endz = startz+nzloc
 !!$  PRINT*, 'PE', me, ' startz, endz, nzloc', startz, endz, nzloc
 !
   IF( me.EQ.0) THEN
      WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
      WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
      WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz)
   END IF
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE2D Result File', real_prec='d', &
        &      mpicomm=MPI_COMM_WORLD)
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NY', ny)
   CALL attach(fid, '/', 'NZ', nz)
   CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
   CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
   CALL attach(fid, '/', 'NIDBAS3', nidbas(3))
   CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
   CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
   CALL attach(fid, '/', 'NGAUSS2', ngauss(3))
   CALL attach(fid, '/', 'MBESS', mbess)
   CALL putarr(fid, '/xgrid', xgrid, 'r')
   CALL putarr(fid, '/ygrid', ygrid, '\theta')
   CALL putarr(fid, '/zgrid', zgrid(0:nz-1), '\phi')
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   t0 = seconds()
   CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid(startz:endz), &
        &          splxyz, (/.FALSE., .TRUE., .TRUE./), nlppform=nlppform)
   splxy => splxyz%sp12
 !
   IF( me.EQ.0) THEN
      WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
      WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
   END IF
   CALL disp(splxyz%sp3%knots, 'KNOTS in Z', MPI_COMM_WORLD)
 !
 !   2D FE matrix assembly (in plane x-y)
 !
   nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
   kl = (nidbas(1)+1)*ny -1     ! Number of sub-diagnonals
   ku = kl                      ! Number of super-diagnonals
   IF(me.EQ.0) THEN
      WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
   END IF
 !
   CALL init(ku, nrank, nterms, mat)
   CALL dismat(splxy, mat)
   ALLOCATE(arr(nrank))
 !
 !   BC on Matrix
 !
   CALL ibcmat(mat, ny)
   tmat = seconds() - t0
 !
 !   3D RHS assembly
 !
   ALLOCATE(rhs(nrank,0:nzloc+nidbas(3)-1)) ! With right guard cells nzloc:nzloc+nidbas3-1
   ALLOCATE(sol(nrank,0:nzloc-1))
   CALL disrhs3(mbess, npow, splxyz, rhs)
 !
   zsuml = SUM(ABS(rhs(:,0:nzloc-1)))
   CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, &
        &          MPI_COMM_WORLD, ierr)
   IF(me.EQ.0) PRINT*, 'sum of rhs after DISRHS3', zsumg
 !
 !   FFT in z of RHS
 !
   CALL dist1d(1, nrank, start_rank, nrank_loc)
   end_rank = start_rank+nrank_loc-1
   ALLOCATE(rhs_t(0:nz-1,nrank_loc), crhs_t(0:nz-1,nrank_loc))
 !
   CALL pptransp(MPI_COMM_WORLD, rhs(:,0:nzloc-1), rhs_t)
   crhs_t = rhs_t
   CALL fourcol(crhs_t,1)
   crhs_t = crhs_t/REAL(nz,8)
 !
 !   Apply Mass matrix to crhs
 !
   CALL set_spline(nidbas(3), ngauss(3), zgrid, splz, .TRUE.)
   kmin =-nz/2
   kmax = nz/2-1
   CALL init_dft(splz, kmin, kmax)
   ALLOCATE(fftmass_shifted(kmin:kmax))
   ALLOCATE(fftmass(0:nz-1))
   CALL calc_fftmass_old(splz, fftmass_shifted)
   DO k=kmin,kmax
      fftmass(MODULO(k+nz,nz)) = fftmass_shifted(k)
   END DO
   IF(me.EQ.0) THEN
      WRITE(*,'(/a/(10(1pe12.3)))') 'Mass matrix', fftmass
   END IF
   DO k=0,nz-1
      crhs_t(k,:) = crhs_t(k,:)/fftmass(k)
   END DO
 !
 !   Fourier transform back crhs to real space in z
 !
   CALL fourcol(crhs_t, -1)
   rhs_t(:,:) = REAL(crhs_t(:,:),8)
   CALL pptransp(MPI_COMM_WORLD, rhs_t, sol)  ! Put the final RHS in SOL
 !
 !   BC on RHS
 !
   CALL ibcrhs3(sol, ny)
 !
   IF(me.EQ.0) THEN
      WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
   END IF
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
   t0 = seconds()
   CALL factor(mat)
   tfact = seconds() - t0
   gflops1 = dopla('DPBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
 
   t0 = seconds()
   CALL bsolve(mat, sol)
 !
 !   Backtransform of solution
 !
   DO k=0,nzloc-1
      sol(1:ny-1,k) = sol(ny,k)
   END DO
 !
   zsuml = SUM(ABS(sol))
   CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, &
        &          MPI_COMM_WORLD, ierr)
   IF(me.EQ.0) PRINT*, 'sum of sol', zsumg
 !
   tsolv = seconds() - t0
   gflops2 = dopla('DPBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
 !
 !   Spline coefficients, taking into account of periodicity in y and z
 !   Note: in SOL, y was numbered first.
 !
   dimx = splxy%sp1%dim
   dimy = splxy%sp2%dim
   dimz = splxyz%sp3%dim
   ALLOCATE(bcoef(0:dimx-1, 0:dimy-1, 0:dimz-1))
 !
 !   Get 3D array of spline coefs.
 !
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,ny) + i*ny + 1
         DO k=0,nzloc-1
            bcoef(i,j,k) = sol(ij,k)
         END DO
      END DO
   END DO
 !
 !   Get missing coefs from remote guard cells
 !
   prev = MODULO(me-1,npes)
   next = MODULO(me+1,npes)
   count = dimx*dimy
   DO i=0,nidbas(3)-1
      CALL mpi_sendrecv(bcoef(0,0,i), count, MPI_DOUBLE_PRECISION, prev, 0, &
           &            bcoef(0,0,nzloc+i), count, MPI_DOUBLE_PRECISION, next, 0, &
           &            MPI_COMM_WORLD, status, ierr)
   END DO
 !
   IF(me.EQ.0) THEN
      WRITE(*,'(/a,3i6)') 'dimx, dimy, dimz =', dimx, dimy, dimz
   END IF
 !===========================================================================
 !              4.0 Check the solution
 !
 !   Check function values computed with various method
 !
   CALL RANDOM_NUMBER(xp)
   CALL RANDOM_NUMBER(yp); yp = 2.d0*pi*yp
   CALL RANDOM_NUMBER(zp); zp = 2.d0*pi*zp
   nploc = 0
   DO i=1,npart
      IF(zp(i).GE.zgrid(startz) .AND. zp(i).LT.zgrid(endz)) THEN
         nploc = nploc+1
         xp(nploc) = xp(i)
         yp(nploc) = yp(i)
         zp(nploc) = zp(i)
      END IF
   END DO
   jder = (/0,0,0/)
   CALL gridval(splxyz, xp(1:nploc), yp(1:nploc), zp(1:nploc), fp_calc(1:nploc), jder, bcoef)
   DO i=1,nploc
      fp_anal(i) = (1-xp(i)**2) * xp(i)**mbess &
           &        * COS(mbess*yp(i)) * COS(zp(i))**npow
   END DO
   errnorm2 =  norm21(fp_calc(1:nploc)-fp_anal(1:nploc))/norm21(fp_calc(1:nploc))
   IF(me.EQ.0) THEN
      WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors, using random points', &
           &    errnorm2
   END IF
 !
   ALLOCATE(solcal(0:nx,0:ny,0:nzloc-1))
   ALLOCATE(solana(0:nx,0:ny,0:nzloc-1))
   ALLOCATE(errsol(0:nx,0:ny,0:nzloc-1))
 !
   DO i=0,nx
      DO j=0,ny
         DO k=0,nzloc-1
            kk=startz+k
            solana(i,j,k) = (1-xgrid(i)**2) * xgrid(i)**mbess &
                 &        * COS(mbess*ygrid(j)) * COS(zgrid(kk))**npow
         END DO
      END DO
   END DO
 !
   jder = (/0,0,0/)
   CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef)
   t0 = seconds()
   CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
   tgrid = seconds()-t0
   errsol = solana - solcal
 !
   errnorm2 =  norm2(errsol) / norm2(solana)
   IF(me.EQ.0) THEN
      WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
           &    errnorm2
   END IF
   CALL putarr(fid, '/sol', solcal,pardim=3)
   CALL putarr(fid, '/solana', solana,pardim=3)
 !
 !   Check derivative d/dx
 !
   DO i=0,nx
      DO j=0,ny
         DO k=0,nzloc-1
            IF( mbess .EQ. 0 ) THEN
               solana(i,j,k) = -2.0d0 * xgrid(i) * COS(zgrid(k+startz))**npow
            ELSE
               solana(i,j,k) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
                    &        xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) * &
                    &        COS(zgrid(k+startz))**npow
            END IF
         END DO
      END DO
   END DO
 !
   jder = (/1,0,0/)
   t0 = seconds()
   CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
   tgrid = tgrid + seconds()-t0
   errsol = solana - solcal
   errnorm2 =  norm2(errsol) / norm2(solana)
   IF(me.EQ.0) THEN
      WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dx', errnorm2
   END IF
   CALL putarr(fid, '/derivx', solcal, pardim=3)
   CALL putarr(fid, '/derivx_exact', solana,pardim=3)
 !
 !   Check derivative d/dy
 !
   DO i=0,nx
      DO j=0,ny
         DO k=0,nzloc-1
            solana(i,j,k) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * &
                 &        SIN(mbess*ygrid(j))* COS(zgrid(k+startz))**npow
         END DO
      END DO
   END DO
 !
   jder = (/0,1,0/)
   t0 = seconds()
   CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
   tgrid = tgrid + seconds()-t0
   errsol = solana - solcal
   errnorm2 =  norm2(errsol) / norm2(solana)
   IF(me.EQ.0) THEN
      WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dy', errnorm2
   END IF
   CALL putarr(fid, '/derivy', solcal, pardim=3)
   CALL putarr(fid, '/derivy_exact', solana,pardim=3)
 !
 !   Check derivative d/dz
 !
   DO i=0,nx
      DO j=0,ny
         DO k=0,nzloc-1
            solana(i,j,k) = -npow*(1-xgrid(i)**2) * xgrid(i)**mbess &
                 &        * COS(mbess*ygrid(j)) * COS(zgrid(k+startz))**(npow-1) &
                 &        * SIN(zgrid(k+startz))
         END DO
      END DO
   END DO
 !
   jder = (/0,0,1/)
   t0 = seconds()
   IF(nlppform) THEN
      CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
   ELSE
      CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef)
   END IF
   tgrid = tgrid + seconds()-t0
   errsol = solana - solcal
   errnorm2 =  norm2(errsol) / norm2(solana)
   IF(me.EQ.0) THEN
      WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dz', errnorm2
   END IF
   CALL putarr(fid, '/derivz', solcal, pardim=3)
   CALL putarr(fid, '/derivz_exact', solana,pardim=3)
 !===========================================================================
 !              9.0  Epilogue
 !
   IF(me.EQ.0) THEN
      WRITE(*,'(/a)') '---'
      WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
      WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
      WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
      WRITE(*,'(a,1pe12.3)') 'gridval time (s)              ', tgrid
      WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
      WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
   END IF
 !
   DEALLOCATE(xgrid, ygrid, zgrid)
   DEALLOCATE(fftmass)
   DEALLOCATE(fftmass_shifted)
   DEALLOCATE(rhs)
   DEALLOCATE(sol)
   DEALLOCATE(rhs_t)
   DEALLOCATE(crhs_t)
   DEALLOCATE(bcoef)
   DEALLOCATE(solcal, solana, errsol)
   DEALLOCATE(arr)
   CALL destroy_sp(splxyz)
   CALL destroy_sp(splz)
   CALL destroy(mat)
 !
   CALL closef(fid)
 !
   CALL mpi_finalize(ierr)
 !===========================================================================
 !
 CONTAINS
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:,:,:)
     DOUBLE PRECISION :: sum2, sum2g
     INTEGER :: i, j
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        DO j=1,SIZE(x,2)
           DO k=1,SIZE(x,3)
              sum2 = sum2 + x(i,j,k)**2
           END DO
        END DO
     END DO
     CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
          &          MPI_COMM_WORLD, ierr)
     norm2 = SQRT(sum2g)
   END FUNCTION norm2
 !
   FUNCTION norm21(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm21
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: sum2, sum2g
     INTEGER :: i, j
 !
     sum2 = DOT_PRODUCT(x,x)
     CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
          &          MPI_COMM_WORLD, ierr)
     norm21 = SQRT(sum2g)
   END FUNCTION norm21
   SUBROUTINE prnmat(label, mat)
     CHARACTER(len=*) :: label
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
     INTEGER :: i
     WRITE(*,'(/a)') TRIM(label)
     DO i=1,SIZE(mat,1)
        WRITE(*,'(10(1pe12.3))') mat(i,:)
     END DO
   END SUBROUTINE prnmat
 END PROGRAM main
 !
 !+++
diff --git a/examples/ppde3d_pb_mod.f90 b/examples/ppde3d_pb_mod.f90
index 72c4917..6995b7a 100644
--- a/examples/ppde3d_pb_mod.f90
+++ b/examples/ppde3d_pb_mod.f90
@@ -1,452 +1,452 @@
 !>
 !> @file ppde3d_pb_mod.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE ppde3d_pb_mod
   USE bsplines
   USE matrix
   IMPLICIT NONE
   INCLUDE "mpif.h"
 !
   INTEGER :: me, npes
   INTEGER :: prev, next
   INTEGER :: count, status(MPI_STATUS_SIZE), ierr
 !
 CONTAINS
   SUBROUTINE dismat(spl, mat)
 !
 !   Assembly of FE matrix mat using spline spl
 !
     TYPE(spline2d), INTENT(in) :: spl
     TYPE(pbmat), INTENT(inout) :: mat
 !
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2
     INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION:: contrib
 !
     INTEGER :: kterms         ! Number of terms in weak form
     INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw  ! Derivative order
     DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE  :: coefs ! Terms in weak form
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
 !
 !   Weak form
 !
     kterms = mat%nterms
     ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms))
 !
     ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative
     ALLOCATE(fun2(0:nidbas2,0:1)) ! 
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !===========================================================================
 !              2.0 Assembly loop
 !
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), spl%sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                 CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs)
                 DO iterm=1,kterms
                    DO iw1=0,nidbas1  ! Weight function in dir 1
                       igw1 = i+iw1
                       DO iw2=0,nidbas2  ! Weight function in dir 2
                          igw2 = MODULO(j+iw2-1, n2) + 1
                          irow = igw2 + (igw1-1)*n2
                          DO it1=0,nidbas1  ! Test function in dir 1
                             igt1 = i+it1
                             DO it2=0,nidbas2  ! Test function in dir 2
                                igt2 = MODULO(j+it2-1, n2) + 1
                                jcol = igt2 + (igt1-1)*n2
                                contrib = fun1(iw1,iderw(iterm,1)) * &
                                     &    fun2(iw2,iderw(iterm,2)) * &
                                     &    coefs(iterm) *             &
                                     &    fun2(it2,idert(iterm,2)) * &
                                     &    fun1(it1,idert(iterm,1)) * &
                                     &    wg1(ig1) * wg2(ig2)
                                CALL updtmat(mat, irow, jcol, contrib)
                             END DO
                          END DO
                       END DO
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
     DEALLOCATE(idert, iderw, coefs)
 !
   CONTAINS
     SUBROUTINE coefeq(x, y, idt, idw, c)
       DOUBLE PRECISION, INTENT(in) :: x, y
       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
       DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
 !
 ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
 !
       c(1) = x        ! 
       idt(1,1) = 1
       idt(1,2) = 0
       idw(1,1) = 1
       idw(1,2) = 0
 !
       c(2) = 1.d0/x
       idt(2,1) = 0
       idt(2,2) = 1
       idw(2,1) = 0
       idw(2,2) = 1
     END SUBROUTINE coefeq
   END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE disrhs3(mbess, npow, spl, rhs)
 !
 !   Assembly the RHS using 3d spline spl
 !
     INTEGER, INTENT(in)           :: mbess, npow
     TYPE(spline2d1d), TARGET      :: spl
     DOUBLE PRECISION, INTENT(out) :: rhs(:,:)
 !
     TYPE(spline1d), POINTER :: sp1, sp2, sp3
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: n3, nidbas3, ndim3, ng3
     INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: buf(:)
     DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     sp1 => spl%sp12%sp1
     sp2 => spl%sp12%sp2
     sp3 => spl%sp3
 !
     CALL get_dim(sp1, ndim1, n1, nidbas1)
     CALL get_dim(sp2, ndim2, n2, nidbas2) 
     CALL get_dim(sp3, ndim3, n3, nidbas3) 
 !
     ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     CALL get_gauss(sp1, ng1)
     CALL get_gauss(sp2, ng2)
     CALL get_gauss(sp3, ng3)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng2), wg2(ng2))
     ALLOCATE(xg3(ng3), wg3(ng3))
 !===========================================================================
 !              2.0 Assembly loop
 !
     nrank = SIZE(rhs,1)
     rhs = 0.0d0
 !
     DO i=1,n1
        CALL get_gauss(sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), sp2, fun2, j)
                 DO k=1,n3
                    CALL get_gauss(sp3, ng3, k, xg3, wg3)
                    DO ig3=1,ng3
                       CALL basfun(xg3(ig3), sp3, fun3, k)
                       contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * &
                            &    rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow)
                       DO k1=0,nidbas1
                          i1 = i+k1
                          DO k2=0,nidbas2
                             j2 = MODULO(j+k2-1,n2) + 1
                             ij = j2 + (i1-1)*n2
                             DO k3=0,nidbas3
                                kk = k+k3
                                rhs(ij,kk) = rhs(ij, kk) + &
                                     &  contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1)
                             END DO
                          END DO
                       END DO
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !
 !     Update from remote guard cells
 !
     CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
     CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
     next = MODULO(me+1,npes)
     prev = MODULO(me-1,npes)
     count = nrank
     ALLOCATE(buf(nrank))
     DO i=nidbas3,1,-1
        CALL mpi_sendrecv(rhs(1,n3+i), count, MPI_DOUBLE_PRECISION, next, 0, &
          &             buf, count, MPI_DOUBLE_PRECISION, prev, 0, &
          &             MPI_COMM_WORLD, status, ierr)
        rhs(:,i) =  rhs(:,i) + buf(:)
     END DO
     DEALLOCATE(buf)
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
     DEALLOCATE(xg3, wg3, fun3)
 !
   CONTAINS
     DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n)
       DOUBLE PRECISION, INTENT(in) :: x1, x2, x3
       INTEGER, INTENT(in) :: m, n
       rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n
     END FUNCTION rhseq
   END SUBROUTINE disrhs3
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
     DOUBLE PRECISION, INTENT(in) :: c(5)
     INTEGER, INTENT(iN) :: nx
     DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
     INTEGER :: nintg
     DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
     DOUBLE PRECISION :: a, b, dx, f0, f1, scal
     INTEGER :: i, k
 !
     a=x(0)
     b=x(nx)
     nintg = 10*nx
     ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
     dx = (b-a)/REAL(nintg)
     xint(0) = a
     fint(0) = 0.0d0
     f1 = fdist(xint(0))
     DO i=1,nintg
        f0 = f1
        xint(i) = xint(i-1) + dx
        f1 = fdist(xint(i))
        fint(i) = fint(i-1) + 0.5*(f0+f1)
     END DO
 !
 !  Normalization
 !
     scal = REAL(nx) / fint(nintg)
     fint(0:nintg) = fint(0:nintg) * scal
 !
 !  Obtain mesh point by (inverse) interpolation
 !
     k = 1
     DO i=1,nintg-1
        IF( fint(i) .GE. REAL(k) ) THEN
           x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
                &   (k-fint(i))
           k = k+1
        END IF
     END DO
 !
     DEALLOCATE(xint, fint)
   CONTAINS
     DOUBLE PRECISION FUNCTION fdist(x)
       DOUBLE PRECISION, INTENT(in) :: x
       fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
     END FUNCTION fdist
   END SUBROUTINE meshdist
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcmat(mat, ny)
 !
 !   Apply BC on matrix
 !
     TYPE(pbmat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: ny
     INTEGER :: kl, ku, nrank, i, j
     DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
 !===========================================================================
 !              1.0 Prologue
 !
     ku = mat%ku
     kl = ku
     nrank = mat%rank
     ALLOCATE(zsum(nrank), arr(nrank))
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum on the NY-th row
 !
     zsum = 0.0d0
     DO i=1,ny
        arr = 0.0d0
        CALL getrow(mat, i, arr)
        DO j=1,ny+ku
           zsum(j) = zsum(j) + arr(j)
        END DO
     END DO
 !
     zsum(ny) = SUM(zsum(1:ny))   ! using symmetry
     CALL putrow(mat, ny, zsum)
 !
 !   The away operator
 !
     DO i = 1,ny-1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     DO i = nrank, nrank-ny+1, -1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(zsum, arr)
 !
   END SUBROUTINE ibcmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcrhs3(rhs, ny)
 !
 !   Apply BC on RHS
 !
     DOUBLE PRECISION, INTENT(inout) :: rhs(:,:)
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank, nz, k
     DOUBLE PRECISION :: zsum
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = SIZE(rhs,1)
     nz = SIZE(rhs,2)
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum
 !
     DO k=1,nz
        zsum = SUM(rhs(1:ny,k))
        rhs(ny,k) = zsum
        rhs(1:ny-1,k) = 0.0d0
     END DO
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     DO k=1,nz
        rhs(nrank-ny+1:nrank,k) = 0.0d0
     END DO
   END SUBROUTINE ibcrhs3
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dist1d(s0, ntot, s, nloc)
     INCLUDE 'mpif.h'
     INTEGER, INTENT(in) :: s0, ntot
     INTEGER, INTENT(out) :: s, nloc
     INTEGER :: me, npes, ierr, naver, rem
 !
     CALL MPI_COMM_SIZE(MPI_COMM_WORLD, npes, ierr)
     CALL MPI_COMM_RANK(MPI_COMM_WORLD, me, ierr)
     naver = ntot/npes
     rem = MODULO(ntot,npes)
     s = s0 + MIN(rem,me) + me*naver
     nloc = naver
     IF( me.LT.rem ) nloc = nloc+1
   END SUBROUTINE dist1d
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE disp(a, str, comm)
 !
 !   Gather partitionned 1d array to 0 and print it
 !
     INCLUDE 'mpif.h'
     DOUBLE PRECISION, INTENT(in) :: a(:)
     INTEGER, INTENT(in) :: comm
     CHARACTER(len=*), INTENT(in) :: str
     INTEGER :: n, ntot, npes, me, ierr, i
     DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:) :: c
     INTEGER, ALLOCATABLE, DIMENSION(:) :: counts, displs
 !
     CALL mpi_comm_rank(comm, me, ierr)
     CALL mpi_comm_size(comm, npes, ierr)
     n = SIZE(a)
     IF(me.EQ.0) THEN
        ALLOCATE(counts(npes), displs(npes+1))
     END IF
     CALL mpi_gather(n, 1, MPI_INTEGER, counts, 1, MPI_INTEGER, 0, comm, ierr)
     IF(me.EQ.0) THEN
        displs(1) = 0
        DO i=2,npes+1
           displs(i) = displs(i-1)+counts(i-1)
        END DO
        ntot = displs(npes+1)
        ALLOCATE(c(ntot))
        c = 0.0d0
     END IF
     CALL mpi_gatherv(a, n, MPI_DOUBLE_PRECISION, c, counts, displs, &
          &           MPI_DOUBLE_PRECISION, 0, comm, ierr)    
     IF( me.EQ.0 ) THEN
        WRITE(*,'(/a)') TRIM(str)
        DO i=1,npes
           WRITE(*,'(a,i3.3/(10(1pe12.3)))') 'PE', i-1,  &
                &        c(displs(i)+1:displs(i+1))
        END DO
        DEALLOCATE(c)
        DEALLOCATE(counts)
        DEALLOCATE(displs)
     END IF
   END SUBROUTINE disp
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE ppde3d_pb_mod
diff --git a/examples/tbasfun.f90 b/examples/tbasfun.f90
index 26bf500..cec95a8 100644
--- a/examples/tbasfun.f90
+++ b/examples/tbasfun.f90
@@ -1,137 +1,137 @@
 !>
 !> @file tbasfun.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test scalar and vector versions of def_basfun
 !
   USE bsplines
   IMPLICIT NONE
   INTEGER :: nx, nidbas, nrank, npt, jdermx
   DOUBLE PRECISION :: dx
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid
   DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fun(:, :), vfun(:,:,:)
   DOUBLE PRECISION, ALLOCATABLE :: fun1(:, :), vfun1(:,:,:)
   DOUBLE PRECISION :: errfun
   INTEGER :: left, i, nerrs, k
   INTEGER, ALLOCATABLE :: vleft(:)
   LOGICAL :: nlper=.FALSE.
   TYPE(spline1d) :: splx
 !
   NAMELIST /newrun/ nx, nidbas, npt, jdermx, nlper
 !
 !===============================================================================
 !
 !   1D grid
 !
   nx = 10
   nidbas = 3
   npt = 1000000
   jdermx = 0
   READ(*,newrun)
   WRITE(*,newrun)
 
   ALLOCATE(xgrid(0:nx))
   dx = 1.0d0/REAL(nx)
   xgrid = (/ (i*dx,i=0,nx) /)
   WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
 !
 !   Set up spline
 !
   CALL set_spline(nidbas, 4, xgrid, splx, period=nlper)
   nrank = splx%dim
   WRITE(*,'(a, i5)') 'nrank =', nrank
   WRITE(*,'(a/(10f8.3))') 'knots', splx%knots
 !
   IF(nx.LE.2) THEN
      WRITE(*,'(/a)') 'VAL0'
      DO i=1,nx
         WRITE(*,'(a,i3)') 'Interval', i
         DO k=1,nidbas+1 ! Spline number
            WRITE(*,'(10f12.4)') splx%val0(:,k,i)
         END DO
      END DO
      IF(nlper) THEN
         WRITE(*,'(/a)') 'VALC'
         DO k=1,nidbas+1 ! Spline number
            WRITE(*,'(10f12.4)') splx%valc(:,k)
         END DO
      END IF
   END IF
 !
   ALLOCATE(xpt(npt))
   ALLOCATE(vleft(npt))
   ALLOCATE(fun(0:nidbas,0:jdermx))  ! Values and derivatives of all Splines
   ALLOCATE(vfun(0:nidbas,0:jdermx,npt))
   ALLOCATE(fun1(0:nidbas,0:jdermx))  ! Values and derivatives of all Splines
   ALLOCATE(vfun1(0:nidbas,0:jdermx,npt))
   CALL RANDOM_NUMBER(xpt)
 !===============================================================================
 !
 !   Check def_basfun
 !
   CALL def_basfun(xpt, splx, vfun, vleft)
 !
   WRITE(*,'(/a)') 'vector def_basfun versus scalar def_basfun'
   WRITE(*,'(a6,a12, 2a6, a12)') 'i', 'x', 'left', 'vleft', 'Max. err'
   DO i=1,npt
      CALL def_basfun(xpt(i), splx, fun, left)
      errfun= MAXVAL(ABS(fun(:,:)-vfun(:,:,i)))
      WRITE(*,'(i6,1pe12.4,2i6,1pe12.4)') i, xpt(i), left, vleft(i), errfun
   END DO
 !
   IF(npt.LE.10) THEN
      WRITE(*,'(/a)')  'Scalar/vector basfun'
      DO i=1,npt
         CALL basfun(xpt(i), splx, fun, vleft(i)+1)
         WRITE(*,'(a,1pe12.4/10(1pe12.4))') 'x = ', xpt(i),  fun(:,:)
         WRITE(*,'(10(1pe12.4))') vfun(:,:,i)
      END DO
   END IF
 !===============================================================================
 !
 !   Check basfun
 !
   CALL basfun(xpt, splx, vfun1, vleft+1)
   WRITE(*,'(/a,1pe12.4)') 'vector basfun versus vector def_basun: Max err', &
        &    MAXVAL(ABS(vfun-vfun1))
 !
   WRITE(*,'(/a)') 'vector basfun versus scalar basfun'
   WRITE(*,'(a6,a12,a12)') 'i', 'x', 'Max. err'
   DO i=1,npt
      CALL basfun(xpt(i), splx, fun1, vleft(i)+1)
      errfun= MAXVAL(ABS(fun1(:,:)-vfun1(:,:,i)))
      WRITE(*,'(i6,1pe12.4,1pe12.4)') i, xpt(i), errfun
   END DO
 !!===============================================================================
 !
 !   Clean up
 !
   CALL destroy_sp(splx)
   DEALLOCATE(xgrid)
   DEALLOCATE(xpt)
   DEALLOCATE(vleft)
   DEALLOCATE(fun)
 END PROGRAM main
diff --git a/examples/tcdsmat.f90 b/examples/tcdsmat.f90
index f832abf..0e3f565 100644
--- a/examples/tcdsmat.f90
+++ b/examples/tcdsmat.f90
@@ -1,283 +1,283 @@
 !>
 !> @file tcdsmat.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Solving the following 2d PDE using splines and iterative method
 !
 !    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
 !    exact solution: f(x,y) = (1-x^2) x^m cos(my)
 !
   USE tcdsmat_mod
   USE bsplines
   USE cds
   USE futils
 !
   IMPLICIT NONE
   INCLUDE 'mpif.h'
 !
   INTEGER                       :: nints(2), nidbas(2), ngauss(2), mbess, nterms
   DOUBLE PRECISION              :: pi, coefx(5), coefy(5)
   DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), ygrid(:)
   DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:)
 
   INTEGER            :: mrank, bw0
   TYPE(spline2d)     :: splxy
   TYPE(cds_mat)      :: mat
   LOGICAL            :: readmat, verbose
   CHARACTER(len=128) :: file='tcdsmat.h5'
   CHARACTER(len=128) :: filein
   INTEGER            :: fid, fidin
   DOUBLE PRECISION   :: mem, seconds
   DOUBLE PRECISION   :: t0, tmat, tbal, tsolv, tgrid, tmumps(2)
   INTEGER            :: nitmx=100, niter, nssor
   DOUBLE PRECISION   :: rtolmx=1.0d-6, omega=0.0d0, resid
 !
   INTEGER                       :: i, j, ij, dimx, dimy
   DOUBLE PRECISION, ALLOCATABLE :: bcoef(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: solcal(:,:), solana(:,:), errsol(:,:)
   INTEGER, ALLOCATABLE          :: dists(:)
 !
   INTEGER :: ierr, me
   TYPE(mumps_mat) :: mat_mumps
 !
   NAMELIST /newrun/ nints, nidbas, ngauss, mbess, coefx, coefy, &
        &            nitmx, rtolmx, omega, nssor, readmat, verbose, filein
 !===========================================================================
 !              1.0     Prologue
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
 !   Read in data specific to run
 !
   WRITE(*,'(/a)') 'Prologue ...'
   readmat = .FALSE.   ! Read matrix and rhs from file
   filein = 'mat.h5'
   nints = (/8,8/)     ! Number of intervals in x, y
   nidbas = (/3,3/)    ! Degree of splines
   ngauss = (/4,4/)    ! Number of Gauss points/interval
   mbess = 2           ! Exponent of differential problem
   nterms = 2          ! Number of terms in weak form
   nitmx = 1000        ! Max number of iterations
   rtolmx = 1.e-12     ! Max relative tolerance
   nssor  = 1          ! Number of SSOR precond steps
   verbose = .FALSE.   ! Output residue at each iteration
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
 !
 !   Overwrite some input by reading in "filein" if required
 !
   IF(readmat) THEN
      CALL openf(filein, fidin, mode='r')
      CALL getatt(fidin, '/', 'NX', nints(1))
      CALL getatt(fidin, '/', 'NY', nints(2))
      CALL getatt(fidin, '/', 'NIDBAS1', nidbas(1))
      CALL getatt(fidin, '/', 'NIDBAS2', nidbas(2))
      CALL getatt(fidin, '/', 'NGAUSS1', ngauss(1))
      CALL getatt(fidin, '/', 'NGAUSS2', ngauss(2))
      CALL getatt(fidin, '/', 'MBESS', mbess)
   END IF
   WRITE(*,newrun)
 !
 !   Define grid on x (=radial) & y (=poloidal) axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nints(1)), ygrid(0:nints(2)))
   xgrid(0) = 0.0d0;   xgrid(nints(1)) = 1.0d0
   CALL meshdist(coefx, xgrid, nints(1))
   ygrid(0) = 0.0d0;   ygrid(nints(2)) = 2.d0*pi
   CALL meshdist(coefy, ygrid, nints(2))
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'TCDSMAT Result File', real_prec='d')
   CALL attach(fid, '/', 'NX', nints(1))
   CALL attach(fid, '/', 'NY', nints(2))
   CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
   CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
   CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
   CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
   CALL attach(fid, '/', 'MBESS', mbess)
 !===========================================================================
 !              2.0       Discretize the PDE
 !
 !   Set up spline
 !
   WRITE(*,'(/a)') 'Discretize the PDE ...'
   t0 = seconds()
   CALL set_spline(nidbas, ngauss, &
        &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./) )
 !
 !   Set up CDS matrix for solver
 !
   CALL mstruct(nidbas, nints, mrank, dists)
   bw0=(nidbas(1)+1)*nints(2)   ! Half band including all zero diagonals
   CALL init(mrank, dists, nterms, mat, bw0=bw0)
   WRITE(*,'(a,4i8)') 'rank, kl, ku, bw0 = ', mat%rank, mat%kl, mat%ku, bw0
   WRITE(*,'(i4,a/(10i8))') mat%ndiags, ' diagonals:', mat%dists
 !
 !   FE matrix assembly and apply BC 
 !
   IF(readmat) THEN
      CALL getmat(fidin, '/MAT1', mat)
   ELSE
      CALL dismat(splxy, mat)
      CALL ibcmat(mat, nints(2), nidbas(1))
   END IF
 !
   WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
 !
 !   Assembly RHS and apply BC
 !
   ALLOCATE(rhs(mrank), sol(mrank))
 !
   IF(readmat) THEN
      CALL getarr(fidin, '/RHS', rhs)
   ELSE
      CALL disrhs(mbess, splxy, rhs)
      CALL ibcrhs(rhs, nints(2))
      CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
   END IF
   tmat = seconds() - t0
 !===========================================================================
 !              3.0       Diagonal balance of matrix
 !
   WRITE(*,'(/a)') 'Diagonal balance of matrix ...'
   tbal = seconds()
   IF( .NOT. readmat ) THEN
      CALL diagbal(mat)
      CALL putmat(fid,'/MAT1', mat, 'CDS matrix with BC')
   END IF
   rhs = mat%bal * rhs
   tbal = seconds()-tbal
 !===========================================================================
 !              4.0       Analytical solutions
 !
   ALLOCATE(solcal(0:nints(1),0:nints(2)))
   ALLOCATE(solana(0:nints(1),0:nints(2)))
   ALLOCATE(errsol(0:nints(1),0:nints(2)))
   DO i=0,nints(1)
      DO j=0,nints(2)
         solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
      END DO
   END DO
 !===========================================================================
 !              5.0       Direct solve with MUMPS
 !
 !
   WRITE(*,'(/a)') 'Solve the linear system using MUMPS ...'
 !
   tmumps(1) = seconds()
   CALL cds2mumps(mat, mat_mumps)
   CALL factor(mat_mumps, debug=.FALSE.)
   tmumps(2) = seconds()
   sol = rhs
   CALL bsolve(mat_mumps, sol, debug=.FALSE.)
   sol = mat%bal * sol
   sol(1:nints(2)-1) = sol(nints(2))  ! Unicity
   tmumps(1:2) = seconds()-tmumps(1:2)
   
 !
   tgrid = seconds()
   dimx = splxy%sp1%dim
   dimy = splxy%sp2%dim
   ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
 !
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,nints(2)) + i*nints(2) + 1
         bcoef(i,j) = sol(ij)
      END DO
   END DO
   CALL gridval(splxy, xgrid, ygrid, solcal, (/0,0/), bcoef)
   errsol = solana - solcal
   tgrid = seconds() - tgrid
 !
   PRINT*, 'Relative discretization errors', norm2(errsol) / norm2(solana)
 !!$  WRITE(*,'(a,2(1pe15.6))') 'Relative discretization errors', &
 !!$       &    norm2(errsol) / norm2(solana)
 !===========================================================================
 !              5.0       Solve the linear system using CG
 !
   WRITE(*,'(/a)') 'Solve the linear system using CG ...'
 !
   tsolv = seconds()
   sol(:) = 0.0d0         !  Initial guest for solution
   IF( nssor .EQ. 0 ) THEN
      CALL cg(mat, rhs, omega, nitmx, rtolmx, sol, resid, niter, &
           &  verbose=verbose)
   ELSE
      CALL cg(mat, rhs, omega, nitmx, rtolmx, sol, resid, niter, &
           &  verbose=verbose, nssor=nssor)
   END IF
   sol = mat%bal * sol
   sol(1:nints(2)-1) = sol(nints(2))  ! Unicity
   tsolv = seconds()-tsolv
 !
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,nints(2)) + i*nints(2) + 1
         bcoef(i,j) = sol(ij)
      END DO
   END DO
   CALL gridval(splxy, xgrid, ygrid, solcal, (/0,0/), bcoef)
   errsol = solana - solcal
 !
   PRINT*, 'Relative discretization errors', norm2(errsol) / norm2(solana)
 !===========================================================================
 !              9.0  Epilogue
 !
   CALL putarr(fid, '/SOL', sol)
   CALL putarr(fid, '/xgrid', xgrid, 'r')
   CALL putarr(fid, '/ygrid', ygrid, '\theta')
   CALL putarr(fid, '/sol', solcal, 'Solutions')
   CALL putarr(fid, '/solana', solana,'Exact solutions')
   CALL putarr(fid, '/errors', errsol, 'Errors')
 !
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)')    'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)')    'Matrice balancing time (s)    ', tbal
   WRITE(*,'(a,2(1pe12.3))') 'MUMPS solver time (s)         ', tmumps
   WRITE(*,'(a,1pe12.3)')    'Solution at grid time (s)     ', tgrid
   WRITE(*,'(a,i8,2(1pe12.3))') 'nits, resid, t(s)', niter, resid, tsolv
 !
   DEALLOCATE(xgrid,ygrid)
   DEALLOCATE(rhs, sol)
   DEALLOCATE(dists)
   DEALLOCATE(bcoef)
   DEALLOCATE(solcal,solana,errsol)
   CALL destroy_sp(splxy)
   CALL destroy(mat)
   CALL closef(fid)
   IF(readmat) THEN
      CALL closef(fidin)
   END IF
 !
   CALL mpi_finalize(ierr)
 !===========================================================================
 END PROGRAM main
diff --git a/examples/tcdsmat_mod.f90 b/examples/tcdsmat_mod.f90
index 776e422..9be9434 100644
--- a/examples/tcdsmat_mod.f90
+++ b/examples/tcdsmat_mod.f90
@@ -1,635 +1,635 @@
 !>
 !> @file tcdsmat_mod.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE tcdsmat_mod
   IMPLICIT NONE
 !
   INTERFACE
      SUBROUTINE meshdist(coefs, x, nx)
        DOUBLE PRECISION, INTENT(in) :: coefs(5)
        INTEGER, INTENT(iN) :: nx
        DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
      END SUBROUTINE meshdist
   END INTERFACE
 !
   INTERFACE norm2
      MODULE PROCEDURE norm2_1d, norm2_2d
   END INTERFACE
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE mstruct(p, n, rank, dist)
 !
 !  It is assumed that:
 !    . 2nd dimension is number first
 !    . 2nd dimension is periodic
 !
     INTEGER, INTENT(in)  :: p(2), n(2)
     INTEGER, INTENT(out) :: rank
     INTEGER, ALLOCATABLE :: dist(:)
 !
     INTEGER, ALLOCATABLE :: pvect(:,:)
     INTEGER              :: kl, ku, i
 !
     rank = (n(1)+p(1))*n(2)    ! Rank of the FE matrix
     ku = (p(1)+1)*(2*p(2)+1)-1
     kl = ku
     ALLOCATE(pvect(0:2*p(2),0:p(1)))
     IF( ALLOCATED(dist)) DEALLOCATE(dist)
     ALLOCATE(dist(-kl:ku))
 !
 !   Upper (North) points
     pvect(0:p(2),0) = (/(i,i=0,p(2))/)
 !
 !   Lower (South) points and periodicity of 2nd dim.
     DO i=1,p(2)
        pvect(p(2)+i,0) = n(2)-pvect(p(2)-i+1,0)
     END DO
 !
 !  Shift by N2 for points on the right (West) side
     DO i=1,p(1)
        pvect(:,i) = pvect(:,i-1)+n(2)
     END DO
 !
 !   Super-diagonals including the diagonal
     dist(0:ku) = RESHAPE(pvect, (/ku+1/))
 !
 !   Sub-diagonals
     DO i=-1,-kl,-1
        dist(i) = -dist(-i)
     END DO
 !
     DEALLOCATE(pvect)
   END SUBROUTINE mstruct
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dismat(spl, mat)
 !
 !   Assembly of FE matrix mat using spline spl
 !
     USE bsplines
     USE cds
 !
     TYPE(spline2d), INTENT(in)   :: spl
     TYPE(cds_mat), INTENT(inout) :: mat
 !
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2
     INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION:: contrib
 !
     INTEGER :: kterms         ! Number of terms in weak form
     INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw  ! Derivative order
     DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE  :: coefs ! Terms in weak form
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
     WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
     WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
 !
 !   Weak form
 !
     kterms = mat%nterms
     ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms))
 !
     ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative
     ALLOCATE(fun2(0:nidbas2,0:1)) ! 
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
  !===========================================================================
 !              2.0 Assembly loop
 !
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), spl%sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                 CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs)
                 DO iterm=1,kterms
                    DO iw1=0,nidbas1  ! Weight function in dir 1
                       igw1 = i+iw1
                       DO iw2=0,nidbas2  ! Weight function in dir 2
                          igw2 = MODULO(j+iw2-1, n2) + 1
                          irow = igw2 + (igw1-1)*n2
                          DO it1=0,nidbas1  ! Test function in dir 1
                             igt1 = i+it1
                             DO it2=0,nidbas2  ! Test function in dir 2
                                igt2 = MODULO(j+it2-1, n2) + 1
                                jcol = igt2 + (igt1-1)*n2
                                contrib = fun1(iw1,iderw(iterm,1)) * &
                                     &    fun2(iw2,iderw(iterm,2)) * &
                                     &    coefs(iterm) *             &
                                     &    fun2(it2,idert(iterm,2)) * &
                                     &    fun1(it1,idert(iterm,1)) * &
                                     &    wg1(ig1) * wg2(ig2)
                                CALL updtmat(mat, irow, jcol, contrib)
                             END DO
                          END DO
                       END DO
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
     DEALLOCATE(idert, iderw, coefs)
 !
   CONTAINS
     SUBROUTINE coefeq(x, y, idt, idw, c)
       DOUBLE PRECISION, INTENT(in) :: x, y
       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
       DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
 !
 ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
 !
       c(1) = x        ! 
       idt(1,1) = 1
       idt(1,2) = 0
       idw(1,1) = 1
       idw(1,2) = 0
 !
       c(2) = 1.d0/x
       idt(2,1) = 0
       idt(2,2) = 1
       idw(2,1) = 0
       idw(2,2) = 1
     END SUBROUTINE coefeq
   END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcmat(mat, ny, px)
 !
 !   Apply BC on matrix
 !
     USE cds
     IMPLICIT NONE
     TYPE(cds_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in)          :: ny, px
 !
     INTEGER :: kl, ku, n, bw0, i, j
     DOUBLE PRECISION :: zsum(mat%rank), arr(mat%rank)
 !===========================================================================
 !              1.0 Prologue
 !
     kl = mat%kl
     ku = mat%ku
     n = mat%rank
     mat%ny = ny
 !
 !   Size of  row ny and column ny
 !
     bw0 = SIZE(mat%rowv)
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum on the NY-th row
 !   Store the sums at row ny in MAT%ROWV
 !
     zsum(1:n) = 0.0d0
     DO i=1,ny
        CALL getrow(mat, i, arr)
        zsum(1:bw0) = zsum(1:bw0) + arr(1:bw0)
        IF( i .LE. ny ) THEN    ! Clear rows 1:(ny-1)
           arr = 0.0d0; arr(i) = 1.0d0
           CALL putrow(mat, i, arr)
        END IF
     END DO
     mat%rowv(1:bw0) = zsum(1:bw0)   ! row ny
 !!$    WRITE(*,'(/a,/(10(f8.3)))') 'rowv', mat%rowv(1:130)
 !
 !   The horizontal sum on the NY-th column
 !   The NY-th row of matrix was stored in mat%rowv
 !   Store the sums ar column ny at MAT%COLH
 !
     zsum(1:n) = 0.0d0
     DO j=1,ny
        CALL getcol(mat, j, arr)
        zsum(ny) = zsum(ny) + mat%rowv(j)
        zsum(ny+1:bw0) = zsum(ny+1:bw0) + arr(ny+1:bw0)
        IF( j .NE. ny ) THEN    ! Clear columns 1:(ny-1)
           mat%rowv(j) = 0.0d0
           arr = 0.0d0; arr(j) = 1.0d0
           CALL putcol(mat, j, arr)
        END IF
     END DO
     mat%rowv(ny) = 0.0d0   ! Its value is now in mat%colh(ny)
     arr = 0.0d0
     CALL putcol(mat, ny, arr)
     CALL putrow(mat, ny, arr)
     mat%colh(1:bw0) = zsum(1:bw0) ! column ny
 !
 !   Move the diagonal term from mat%colh back to main diagonal
 !
     CALL putele(mat, ny, ny, mat%colh(ny))
     mat%colh(ny) = 0.0d0
 !!$    WRITE(*,'(/a,/(10(f8.3)))') 'rowv', mat%rowv(1:130)
 !!$    WRITE(*,'(/a,/(10(f8.3)))') 'colh', mat%colh(1:130)
 !!$    WRITE(*,'(/a,/(10(f8.3)))') 'colh', mat%val(1:ny,0)
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     DO j = n, n-ny+1, -1
        arr = 0.0d0; arr(j) = 1.0d0
        CALL putcol(mat, j, arr)     
     END DO
 !
     DO i = n, n-ny+1, -1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !!$    WRITE(*,'(/a,/(10(f8.3)))') 'diag 1', mat%val(n-ny-1:n,1)
 !!$    WRITE(*,'(/a,/(10(f8.3)))') 'diag 0', mat%val(n-ny:n,0)
 !!$    WRITE(*,'(/a,/(10(f8.3)))') 'diag -1', mat%val(n-ny-1:n,-1)
 !===========================================================================
 !              9.0  Epilogue
 !
   END SUBROUTINE ibcmat
 !===========================================================================
   SUBROUTINE disrhs(mbess, spl, rhs)
 !
 !   Assembly the RHS using 2d spline spl
 !
     USE bsplines
     INTEGER, INTENT(in)           :: mbess
     TYPE(spline2d), INTENT(in)    :: spl
     DOUBLE PRECISION, INTENT(out) :: rhs(:)
 !
     INTEGER                       :: n1, nidbas1, ndim1, ng1
     INTEGER                       :: n2, nidbas2, ndim2, ng2
     INTEGER                       :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
 !
     ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !===========================================================================
 !              2.0 Assembly loop
 !
     nrank = SIZE(rhs)
     rhs(1:nrank) = 0.0d0
 !
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), spl%sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                 contrib = wg1(ig1)*wg2(ig2) * &
                      &    rhseq(xg1(ig1),xg2(ig2), mbess)
                 DO k1=0,nidbas1
                    i1 = i+k1
                    DO k2=0,nidbas2
                       j2 = MODULO(j+k2-1,n2) + 1
                       ij = j2 + (i1-1)*n2
                       rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
 !
   CONTAINS
     DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
       DOUBLE PRECISION, INTENT(in) :: x1, x2
       INTEGER, INTENT(in) :: m
       rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
     END FUNCTION rhseq
   END SUBROUTINE disrhs
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcrhs(rhs, ny)
 !
 !   Apply BC on RHS
 !
     DOUBLE PRECISION, INTENT(inout) :: rhs(:)
     INTEGER, INTENT(in)              :: ny
 !
     INTEGER          :: nrank
     DOUBLE PRECISION :: zsum
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = SIZE(rhs,1)
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum
 !
     zsum = SUM(rhs(1:ny))
     rhs(ny) = zsum
     rhs(1:ny-1) = 0.0d0
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     rhs(nrank-ny+1:nrank) = 0.0d0
   END SUBROUTINE ibcrhs
 !===========================================================================
   SUBROUTINE cg(mat, rhs, omega, nitmx, rtolmx, sol, resid, nit, verbose, &
        &        nssor)
 !
 !   Preconditionned Conjugate Gradient solver
 !
     USE cds
     TYPE(cds_mat)                           :: mat
     DOUBLE PRECISION, INTENT(in)            :: rhs(:), omega, rtolmx
     INTEGER, INTENT(in)                     :: nitmx
     DOUBLE PRECISION, INTENT(inout)         :: sol(:)
     DOUBLE PRECISION, OPTIONAL, INTENT(out) :: resid
     INTEGER, OPTIONAL, INTENT(out)          :: nit
     INTEGER, OPTIONAL, INTENT(in)           :: nssor
     LOGICAL, OPTIONAL, INTENT(in)           :: verbose
 !
     DOUBLE PRECISION, DIMENSION(SIZE(rhs,1)) :: wr, wz, wp, wq
     DOUBLE PRECISION :: bnrm2, residue, rho0, rho1, alpha, beta
     INTEGER          :: it
 !
     bnrm2 = SQRT(DOT_PRODUCT(rhs,rhs))  ! Euclidian norm of RHS
     it = 0
     wr = rhs-vmx(mat,sol)
 !
 !... Iteration loop  (see fig. 2.5, p.15 of "Templates...")
     DO
        it = it+1
        IF( PRESENT(nssor) ) THEN
           CALL psolve(mat, wz, wr, omega, nssor)
        ELSE
           wz = wr
        END IF
        rho1 = DOT_PRODUCT(wr,wz)
        IF( it .EQ. 1 ) THEN
           wp = wz
        ELSE
           beta = rho1/rho0
           wp = wz + beta*wp
        END IF
        wq = vmx(mat,wp)
        alpha = rho1 / DOT_PRODUCT(wp,wq)
        sol = sol + alpha*wp
        wr = wr - alpha*wq
        residue = SQRT(DOT_PRODUCT(wr,wr)) / bnrm2
        IF( PRESENT(verbose) ) THEN
           IF(verbose) WRITE(*,'(a,i8,1pe12.3)') 'it, resid', it, residue
        END IF
        IF( residue .LE. rtolmx .OR. it .GE. nitmx) EXIT
        rho0 = rho1
     END DO
     IF(PRESENT(resid)) resid = residue
     IF(PRESENT(nit)) nit = it
   END SUBROUTINE cg
 !===========================================================================
   SUBROUTINE psolve(mat, x, b, omega, niter_in)
 !
 !   Preconditionners
 !
     USE cds
     TYPE(cds_mat)                 :: mat
     DOUBLE PRECISION, INTENT(out) :: x(:)
     DOUBLE PRECISION, INTENT(in)  :: b(:)
     DOUBLE PRECISION, INTENT(in)  :: omega
     INTEGER, OPTIONAL, INTENT(in) :: niter_in
 !
     INTEGER          :: niter
     DOUBLE PRECISION :: rtolmx
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 !                        1.   No-preconditionning
 !
     IF( omega .LT. 0.0d0 ) THEN
        x = b
        RETURN
     END IF
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 !                        2.   SSOR Preconditionner
 !
     niter = 1
     rtolmx = 1.d-6
     IF(PRESENT(niter_in)) THEN
        niter = niter_in
     END IF
     CALL ssor(mat, b, omega, niter, rtolmx, x)
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   END SUBROUTINE psolve
 !
 !===========================================================================
   SUBROUTINE ssor(mat, b, omega, nitmx, rtolmx, x, resid, nit, verbose)
 !
 !   Solve Ax = b using SSOR method
 !
     USE cds
     TYPE(cds_mat)                           :: mat
     DOUBLE PRECISION, INTENT(out)           :: x(:)
     DOUBLE PRECISION, INTENT(in)            :: b(:)
     DOUBLE PRECISION, INTENT(in)            :: omega, rtolmx
     INTEGER, INTENT(in)                     :: nitmx
     DOUBLE PRECISION, OPTIONAL, INTENT(out) :: resid
     INTEGER, OPTIONAL, INTENT(out)          :: nit
     LOGICAL, OPTIONAL, INTENT(in)           :: verbose
 !
     INTEGER          :: n, iter
     INTEGER          :: k, i, j, d, bw0, ny
     DOUBLE PRECISION :: omega1, bnrm2, residue
     DOUBLE PRECISION :: rhs(SIZE(x))
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 !                            1. Initialization
     n = SIZE(x)
     bw0 = SIZE(mat%rowv)
     ny = mat%ny
     bnrm2 = norm2(b)  ! Euclidian norm of RHS
     omega1 = 1.0d0-omega
     iter = 0
     DO
        iter = iter+1
 !
 !                            2. Forward SOR
 !  Set RHS
        rhs = omega*b  ! rhs <- omega*b
        IF( iter .GT. 1 ) THEN
           rhs = rhs + omega1*x ! rhs <- rhs + (1-omega)*x0
           DO k=1,mat%ku        ! rhs <- rhs - omega*U*x0
              d = mat%dists(k)
              DO i=MAX(1,1-d),MIN(n,n-d)
                 rhs(i) = rhs(i) - omega*mat%val(i,k)*x(i+d)
              END DO
           END DO
           IF( ny .NE. 0 ) THEN  ! Contributions from unicity BC
              rhs(ny) = rhs(ny) - omega*DOT_PRODUCT(mat%rowv(ny+1:bw0),x(ny+1:bw0))
           END IF
        END IF
 !
 !  Solve (1+omega*L) x = rhs
        x = rhs
        IF( ny .NE. 0 ) THEN  ! Contributions from unicity BC
           rhs(ny+1:bw0) = rhs(ny+1:bw0) - omega*mat%colh(ny:bw0)*x(ny)
        END IF
        DO i=ny+1,n
           DO k=-1,-mat%kl,-1
              d = mat%dists(k)
              j=i+d
              IF( j.LE.0 ) EXIT
              x(i) = x(i) - omega*mat%val(i,k)*x(j)
           END DO
        END DO
 !
 !                            3. Backward SOR
 !  Set RHS
        rhs = omega*b + omega1*x  ! rhs <- omega*b + (1-omega)*x0
        IF( ny .NE. 0 ) THEN  ! Contributions from unicity BC
           rhs(ny+1:bw0) = rhs(ny+1:bw0) - omega*mat%colh(ny:bw0)*x(ny)
        END IF
        DO k=-mat%kl,-1           ! rhs <- rhs - omega*L*x0
           d = mat%dists(k)
           DO i=MAX(1,1-d),MIN(n,n-d)
              rhs(i) = rhs(i) - omega*mat%val(i,k)*x(i+d)
           END DO
        END DO
 !
 !  Solve (1+omega*U) x = rhs
        x = rhs
        DO i=n-1,ny+1,-1
           DO k=1,mat%ku
              d = mat%dists(k)
              j = i+d
              IF( j.GT.n ) EXIT
              x(i) = x(i) - omega*mat%val(i,k)*x(j)
           END DO
        END DO
        IF( ny .NE. 0 ) THEN  ! Contributions from unicity BC
           x(ny) = x(ny) - omega*DOT_PRODUCT(mat%rowv(ny+1:bw0),x(ny+1:bw0))
        END IF
 !
 !                            4. Compute residue
 !
        IF( PRESENT(resid) ) THEN
           residue = norm2(b-vmx(mat,x)) / bnrm2
           IF(PRESENT(verbose)) THEN
              IF(verbose) WRITE(*,'(a,i8,1pe12.3)') 'it, resid', iter, residue
           END IF
           IF( residue .LT. rtolmx ) EXIT
        END IF
 !
        IF( iter .GE. nitmx ) EXIT
     END DO  ! End of SSOR iterations
     IF(PRESENT(nit))  nit = iter
     IF(PRESENT(resid)) resid = residue
   END SUBROUTINE ssor
 !===========================================================================
   SUBROUTINE diagbal(mat)
 !
 !   Diagonal matrix balancing: store D^(-1/2) in mat%bal
 !
     USE cds
     TYPE(cds_mat)    :: mat
     INTEGER          :: n, bw0, ny, d, i, k
     DOUBLE PRECISION :: diag(mat%rank)
 ! 
     n = mat%rank
     ny = mat%ny
     bw0 = SIZE(mat%colh)
     diag(1:n) = mat%val(1:n,0)
     IF( MINVAL(diag) .LE. 0.0d0 ) THEN
        WRITE(*,'(a)') 'Diagonal elements of matrix are not stricly positive!'
        STOP
     END IF
     diag(1:n) = 1.0d0/SQRT(diag(1:n))
 !
 !     Scale the matrix
 
 !$OMP parallel do private (k,d,i)
     DO k=-mat%kl,mat%ku
        d = mat%dists(k)
        DO i=MAX(1,1-d),MIN(n,n-d)
           mat%val(i,k) = diag(i)*diag(i+d)*mat%val(i,k)
        END DO
     END DO
 !$OMP end parallel do
 !
 !     The ny^th column and row
     IF( ny.NE.0 ) THEN
        mat%rowv(1:bw0) = diag(1:bw0)*diag(ny)*mat%rowv(1:bw0)
        mat%colh(1:bw0) = diag(ny)*diag(1:bw0)*mat%colh(1:bw0)
     END IF
 !
 !     Save D^(-1/2)
     mat%bal(:) = diag(:)
   END SUBROUTINE diagbal
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION norm2_1d(x)
 !
 !  Compute the 2-norm of 1d array
 !
     DOUBLE PRECISION :: x(:)
     DOUBLE PRECISION :: norm2_1d
     norm2_1d = SQRT(SUM(x*x))
   END FUNCTION norm2_1d
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION norm2_2d(x)
 !
 !  Compute the 2-norm of 2d array
 !
     DOUBLE PRECISION :: x(:,:)
     DOUBLE PRECISION :: norm2_2d
     norm2_2d = SQRT(SUM(x*x))
   END FUNCTION norm2_2d
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE tcdsmat_mod
diff --git a/examples/test_kron.f90 b/examples/test_kron.f90
index ee9d5fe..52a5a2c 100644
--- a/examples/test_kron.f90
+++ b/examples/test_kron.f90
@@ -1,125 +1,125 @@
 !>
 !> @file test_kron.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test Kronecker product for both GE and CSR versions.
 !
   USE iso_fortran_env, ONLY : rkind => real64
   USE matrix, ONLY : gemat, init, kron, vmx
   USE csr, ONLY    : csr_mat, init, full_to_csr, kron, vmx
   IMPLICIT NONE
 !
   TYPE(gemat)   :: mata, matb, matc
   TYPE(gemat)   :: matu1
   TYPE(csr_mat) :: mata_csr, matb_csr, matc_csr
   INTEGER, PARAMETER :: m=3, n=2
   REAL(rkind) :: a(m*m)=[ 1.1, 0.0, 0.0, 0.2, 1.5, 1.0, 0.5, 0.0, 1.0 ]
   REAL(rkind) :: b(n*n)=[ 2.0, 1.0, 0.0, 3.0 ]
   REAL(rkind), TARGET :: u(m,n), uu(m,n), v(m,n)
   REAL(rkind), POINTER :: u1d(:), uu1d(:)
   INTEGER :: i, s, e
 !
   CALL init(m, 0, mata)
   CALL init(n, 0, matb)
   mata%val(1:m,1:m) = RESHAPE(a, [m,m])
   matb%val(1:n,1:n) = RESHAPE(b, [n,n])
 !
   CALL printmat_ge('Matrix A', mata)
   CALL printmat_ge('Matrix B', matb)
 !
   u1d(1:m*n) => u   !  u1d = vec(u)
   u1d = [ (REAL(i,rkind), i=1,m*n) ]
   CALL printmat('Array U', u)
 !
 !   Compute (A.U).B^T
 !
   CALL init(n, 0, matu1, mrows=m)
   v = TRANSPOSE(vmx(matb, TRANSPOSE(vmx(mata, u))))
   CALL printmat('(A.U).B^T', v)
 !
 !  Compute (BxA).vec(U)
 !
   CALL kron(matb, mata, matc)
   uu1d(1:m*n) => uu
   uu1d = vmx(matc, u1d)
   CALL printmat_ge('Matrix C', matc)
   CALL printmat('(BxA).vec(U)', uu)
 !----------------------------------------------------------------------
 !
 !   Using CSR matrices
 !
   CALL full_to_csr(mata%val, mata_csr)
   CALL full_to_csr(matb%val, matb_csr)
   CALL printmat_csr('Matrix A', mata_csr)
   CALL printmat_csr('Matrix B', matb_csr)
 !
   CALL kron(matb_csr, mata_csr, matc_csr)
   uu1d = vmx(matc_csr, u1d)
 !
   CALL printmat_csr('Matrix C', matc_csr)
   CALL printmat('(BxA).vec(U)', uu)
 !  
 CONTAINS
   SUBROUTINE printmat(str, a)
     CHARACTER(len=*) :: str
     REAL(rkind), INTENT(in)  :: a(:,:)
     INTEGER :: i,m,n
     WRITE(*,'(/a)')  TRIM(str)
     m=SIZE(a,1)
     n=SIZE(a,2)
     DO i=1,m
        WRITE(*,'(12f8.3)') a(i,:)
     END DO
   END SUBROUTINE printmat
   SUBROUTINE printmat_ge(str, a)
     CHARACTER(len=*) :: str
     TYPE(gemat) :: a
     INTEGER :: i
     WRITE(*,'(/a)')  TRIM(str)
     DO i=1,a%mrows
        WRITE(*,'(12f8.3)') a%val(i,:)
     END DO
   END SUBROUTINE printmat_ge
   SUBROUTINE printmat_csr(str, a)
     CHARACTER(len=*) :: str
     TYPE(csr_mat) :: a
     INTEGER :: i, s, e
     REAL(rkind) :: arow(a%ncols)
     WRITE(*,'(/a,a,3i4)') TRIM(str),  ': m, n, nnz', a%mrows, a%ncols, a%nnz
     DO i=1,a%mrows
        arow = 0.0_rkind
        s = a%irow(i)
        e = a%irow(i+1)-1
        arow(a%cols(s:e)) = a%val(s:e)
        WRITE(*,'(12f8.3)') arow
     END DO
     IF(SIZE(a%idiag) .GT. 0) THEN
        WRITE(*,'(a,(20i4))') 'idiag =', a%idiag
        WRITE(*,'(a,12f8.3)') 'diag = ', a%val(a%idiag)
     END IF
 !
   END SUBROUTINE printmat_csr
 END PROGRAM main
diff --git a/examples/test_pwsmp.f90 b/examples/test_pwsmp.f90
index 0537296..29ebbf1 100644
--- a/examples/test_pwsmp.f90
+++ b/examples/test_pwsmp.f90
@@ -1,287 +1,287 @@
 !>
 !> @file test_pwsmp.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !!$  USE futils
   IMPLICIT NONE
   INCLUDE 'mpif.h'
 !
   INTEGER :: npes, me, ierr, comm=MPI_COMM_WORLD
   INTEGER :: l, i, lun=99
   INTEGER :: nrank, nnz, s, e, nrank_loc, nnz_loc, nnz_sum
   INTEGER :: istart, iend
   INTEGER, ALLOCATABLE :: irow(:), cols(:)
   INTEGER, ALLOCATABLE :: irow_loc(:), cols_loc(:)
   DOUBLE PRECISION, ALLOCATABLE :: val(:), val_loc(:)
   DOUBLE PRECISION, ALLOCATABLE :: rhs(:), rhs_loc(:)
   DOUBLE PRECISION, ALLOCATABLE :: sol(:), sol_loc(:)
   DOUBLE PRECISION :: mem
   CHARACTER(len=128) :: fname = "mat.dat"
   DOUBLE PRECISION :: mem_loc, mem_min, mem_max
   DOUBLE PRECISION :: err, err_max, err_norm
   DOUBLE PRECISION :: t0, tfact, tsolv
   INTEGER :: it, nits=100
 !
 !  PWSMP vars
 !
   DOUBLE PRECISION :: dparm(64)
   INTEGER          :: iparm(64)
   INTEGER, ALLOCATABLE :: perm(:), invp(:)
 !
   INTEGER :: mrp                     ! just a placeholder in this program
   DOUBLE PRECISION :: aux, diag      ! just placeholders in this program
   INTEGER :: naux=0, nrhs=1
 !===========================================================================
 !                             1.0  Prologue
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_size(comm, npes, ierr)
   CALL mpi_comm_rank(comm, me, ierr)
 !===========================================================================
 !                             2.0  Read matrix
 !
 !     File header
   IF( command_argument_count() > 0 ) THEN
      CALL get_command_argument(1, fname, l, ierr)
   END IF
   OPEN(unit=lun, file=fname, form="unformatted")
   READ(lun) nrank, nnz
   IF(me.EQ.0) WRITE(*,'(a,3i16)') 'npes, nrank, nnz', npes, nrank, nnz
 !
 !    Matrix partition
   CALL dist1d(comm, 1, nrank, istart, nrank_loc)
   iend = istart+nrank_loc-1
   WRITE(*,'(a,i3.3,a,2i12)') 'PE', me, ':istart, iend', istart, iend
   ALLOCATE(irow_loc(nrank_loc+1))
 !
 !   Read irow
   ALLOCATE(irow(nrank+1))
   READ(lun) irow
   nnz_loc = irow(iend+1)-irow(istart)
   CALL mpi_reduce(nnz_loc, nnz_sum, 1, MPI_INTEGER, MPI_SUM, 0, comm, ierr)
   IF(me.EQ.0) THEN
      PRINT*, 'nnz_sum', nnz_sum
   END IF
   irow_loc(:) = irow(istart:iend+1)   ! Still unshifted
   DEALLOCATE(irow)
 !
   ALLOCATE(cols_loc(nnz_loc))
   ALLOCATE(val_loc(nnz_loc))
   ALLOCATE(rhs_loc(nrank_loc))
   ALLOCATE(sol_loc(nrank_loc))
 !
   s = irow_loc(1)
   e = irow_loc(nrank_loc+1)-1
   irow_loc(:) = irow_loc(:)-s+1     ! Shifted relative irow
   WRITE(*,'(a,i3.3,a,3i12)') 'PE', me, ':s, e, nnz_loc', s, e, nnz_loc
 !
 !   Read cols
   ALLOCATE(cols(nnz))
   READ(lun) cols
   cols_loc(:) = cols(s:e)
   DEALLOCATE(cols)
 !
 !   Read vals
   ALLOCATE(val(nnz))
   READ(lun) val
   val_loc(:) = val(s:e)
   DEALLOCATE(val)
 !
 !   Read RHS
   ALLOCATE(rhs(nrank))
   READ(lun) rhs
   rhs_loc(:) = rhs(istart:iend)
   DEALLOCATE(rhs)
 !
 !!$  mem_loc = mem()
 !!$  CALL minmax_r(mem_loc, comm, 'mem used (MB) after matrix read')
 !===========================================================================
 !                             3.0  Call PWSMP
 !
 !   Initializing of PWSMP.
 !
 !!$  CALL pwsmp_initialize
   ALLOCATE(invp(nrank), perm(nrank))
 !
 !   Fill 'iparm' and 'dparm' arrays with default values.
   iparm(1:3) = 0
   CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, &
        &       rhs_loc, nrank_loc, nrhs, &
        &       aux, naux, mrp, iparm, dparm)
   IF(iparm(64).NE.0) THEN
      PRINT*, 'WSMP init failed with iparm(64) =', iparm(64)
      CALL mpi_abort(comm, iparm(64), ierr)
   ELSE
      IF(me.EQ.0) PRINT*, 'WSMP init ok'
   END IF
 !
 !    Ordering
   iparm(2) = 1
   iparm(3) = 1
   CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, &
        &       rhs_loc, nrank_loc, nrhs, &
        &       aux, naux, mrp, iparm, dparm)
   IF(iparm(64).NE.0) THEN
      PRINT*, 'WSMP ordering failed with iparm(64) =', iparm(64)
      CALL mpi_abort(comm, iparm(64), ierr)
   ELSE
      IF(me.EQ.0) PRINT*, 'WSMP ordering ok'
   END IF
 !
 !    Symbolic factorization
   iparm(2) = 2
   iparm(3) = 2
   CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, &
        &       rhs_loc, nrank_loc, nrhs, &
        &       aux, naux, mrp, iparm, dparm)
   IF(iparm(64).NE.0) THEN
      PRINT*, 'WSMP symbolic failed with iparm(64) =', iparm(64)
      CALL mpi_abort(comm, iparm(64), ierr)
   ELSE
      IF(me.EQ.0) PRINT*, 'WSMP symbolic ok'
   END IF
   IF(me.EQ.0) THEN
      PRINT *,'Number of nonzeros in factor L = 1000 X ',iparm(24)
      PRINT *,'Number of FLOPS in factorization = ',dparm(23)
      PRINT *,'Double words needed to factor on 0 = 1000 X ',iparm(23)
   END IF
 !
 !    Cholesky factorizarion
   iparm(2) = 3
   iparm(3) = 3
   t0 = mpi_wtime()
   CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, &
        &       rhs_loc, nrank_loc, nrhs, &
        &       aux, naux, mrp, iparm, dparm)
   tfact = mpi_wtime()-t0
   IF(iparm(64).NE.0) THEN
      PRINT*, 'WSMP Choleski failed with iparm(64) =', iparm(64)
      CALL mpi_abort(comm, iparm(64), ierr)
   ELSE
      IF(me.EQ.0) PRINT*, 'WSMP Choleski ok'
   END IF
 !
 !    Backsolve
   t0 = mpi_wtime()
   DO it=1,nits
      sol_loc=rhs_loc
      iparm(2) = 4
      iparm(3) = 4
      CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, &
           &       sol_loc, nrank_loc, nrhs, &
           &       aux, naux, mrp, iparm, dparm)
   END DO
   rhs_loc=sol_loc
   tsolv = (mpi_wtime()-t0)/REAL(nits,8)
   IF(iparm(64).NE.0) THEN
      PRINT*, 'WSMP backsolve failed with iparm(64) =', iparm(64)
      CALL mpi_abort(comm, iparm(64), ierr)
   ELSE
      IF(me.EQ.0) PRINT*, 'WSMP backsolve ok'
   END IF
 !
 !   Iterative refinement
   iparm(2) = 5
   iparm(3) = 5
   CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, &
        &       rhs_loc, nrank_loc, nrhs, &
        &       aux, naux, mrp, iparm, dparm)
   IF(iparm(64).NE.0) THEN
      PRINT*, 'WSMP refinement failed with iparm(64) =', iparm(64)
      CALL mpi_abort(comm, iparm(64), ierr)
   ELSE
      IF(me.EQ.0) PRINT*, 'WSMP refinement ok'
   END IF
 !
 !!$  mem_loc = mem()
 !!$  CALL minmax_r(mem_loc, comm, 'mem used (MB) after PWSMP')
 !===========================================================================
 !                             4.0  Check SOL
 !
 !   Read SOL
   ALLOCATE(sol(nrank))
   READ(lun) sol
   sol_loc(:) = sol(istart:iend)
   DEALLOCATE(sol)
   PRINT*, 'Comp. sol', SUM(rhs_loc)
   PRINT*, 'Exact sol', SUM(sol_loc)
 !
   err=MAXVAL(ABS(sol_loc-rhs_loc))
   CALL mpi_reduce(err, err_max, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr)
   IF(me.EQ.0) THEN
      PRINT*, 'Max. error', err_max
   END IF
   rhs_loc = rhs_loc-sol_loc
   err = DOT_PRODUCT(rhs_loc,rhs_loc)
   CALL mpi_reduce(err, err_norm, 1, MPI_DOUBLE_PRECISION, MPI_SUM, 0, comm, ierr)
   IF(me.EQ.0) THEN
      PRINT*, 'Norm of error', SQRT(err_norm)
   END IF
 !
 !!$  mem_loc = mem()
 !!$  CALL minmax_r(mem_loc, comm, 'mem used (MB)')
 !===========================================================================
 !                             9.0  Epilogue
 !
   CALL minmax_r(tfact, comm, 'Factorisation time(s)')
   CALL minmax_r(tsolv, comm, '    Backsolve time(s)')
   CALL mpi_finalize(ierr)
 !
 CONTAINS
   SUBROUTINE dist1d(comm, s0, ntot, s, nloc)
 !
 !  1d distribute ntot elements, returns offset s and local number of 
 !  elements nloc.
 ! 
     IMPLICIT NONE
     INCLUDE 'mpif.h'
     INTEGER, INTENT(in) :: s0, ntot
     INTEGER, INTENT(out) :: s, nloc
     INTEGER :: comm, me, npes, ierr, naver, rem
 !
     CALL MPI_COMM_SIZE(comm, npes, ierr)
     CALL MPI_COMM_RANK(comm, me, ierr)
     naver = ntot/npes
     rem = MODULO(ntot,npes)
     s = s0 + MIN(rem,me) + me*naver
     nloc = naver
     IF( me.LT.rem ) nloc = nloc+1
   END SUBROUTINE dist1d
 !
   SUBROUTINE minmax_r(x, comm, str)
     CHARACTER(len=*), INTENT(in) :: str
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(in)          :: comm
     INTEGER :: me, ierr
     DOUBLE PRECISION ::  xmin, xmax
     CALL mpi_comm_rank(comm, me, ierr)
     CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr)
     CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr)
     IF( me.EQ.0 ) THEN
        WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax
     END IF
   END SUBROUTINE minmax_r
 !
 END PROGRAM main
diff --git a/examples/tlocintv.f90 b/examples/tlocintv.f90
index 0208d29..a6970c2 100644
--- a/examples/tlocintv.f90
+++ b/examples/tlocintv.f90
@@ -1,140 +1,140 @@
 !>
 !> @file tlocintv.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Optimization of locintv
 !
   USE bsplines
   IMPLICIT NONE
   INTEGER :: nx, nidbas, ngauss, np, nits
   DOUBLE PRECISION :: a, b, coefs(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid
   TYPE(spline1d) :: splx
 !
   INTEGER :: i, nerrs, it
   DOUBLE PRECISION :: t0, t1, seconds, tscal, tscal_new, tvec, tvec_new
   DOUBLE PRECISION, ALLOCATABLE :: xp(:)
   INTEGER, ALLOCATABLE :: left(:)
 !
   INTERFACE
      SUBROUTINE meshdist(coefs, x, nx)
        DOUBLE PRECISION, INTENT(in) :: coefs(5)
        INTEGER, INTENT(iN) :: nx
        DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
      END SUBROUTINE meshdist
   END INTERFACE
 !
   NAMELIST /newrun/ nx, nidbas, ngauss, np, nits, a, b, coefs
 !===========================================================================
 !   Read in data
 !
   nx = 8       ! Number oh intevals in x
   nidbas = 3   ! Degree of splines
   ngauss = 4   ! Number of Gauss points/interval
   np = 10 ! Number of random points in [a,b]
   nits = 1000000
   a = 0.0
   b = 1.0
   coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
                                                  ! see function FDIST in MESHDIST
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x axis
 !
   ALLOCATE(xgrid(0:nx))
   xgrid(0) = a
   xgrid(nx) = b
   CALL meshdist(coefs, xgrid, nx)
   WRITE(*,'(/a/(10f8.3))') 'XGRID', xgrid(0:nx)
   WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
 !
 !   Set up spline
 !
   CALL set_spline(nidbas, ngauss, xgrid, splx)
   WRITE(*,'(a,l1)') 'Is mesh equidistant? ', splx%nlequid
 !
 !   Test locintv
 !
   ALLOCATE(xp(np))
   ALLOCATE(left(np))
   xp(:) = (b-a)*xp(:) + a
   tscal = 0.0d0
   tscal_new = 0.0d0
   tvec = 0.0d0
   tvec_new = 0.0d0
 !
   nerrs = 0
   DO it=1,nits
      CALL RANDOM_NUMBER(xp)
      t0 = seconds()
      DO i=1,np
         CALL locintv_old(splx, xp(i), left(i))
      END DO
      tscal = tscal + seconds()-t0
      nerrs = nerrs + COUNT(.NOT.in_interv(xp, left))
 !
      t0 = seconds()
      DO i=1,np
         CALL locintv(splx, xp(i), left(i))
      END DO
      tscal_new = tscal_new + seconds()-t0
      nerrs = nerrs + COUNT(.NOT.in_interv(xp, left))
 !
      t0 = seconds()
      CALL locintv_old(splx, xp, left)
      tvec = tvec + seconds()-t0
      nerrs = nerrs + COUNT(.NOT.in_interv(xp, left))
 !
      t0 = seconds()
      CALL locintv(splx, xp, left)
      tvec_new = tvec_new + seconds()-t0
      nerrs = nerrs + COUNT(.NOT.in_interv(xp, left))
   END DO
   PRINT*, 'nerrs =', nerrs
 !
   tscal = tscal/(REAL(nits*np,8))
   tscal_new = tscal_new/(REAL(nits*np,8))
   tvec = tvec/(REAL(nits*np,8))
   tvec_new = tvec_new/(REAL(nits*np,8))
   WRITE(*,'(4a12)') 'scalar', 'scalar new', 'vector', 'vector new'
   WRITE(*,'(4(1pe12.3))') tscal, tscal_new, tvec, tvec_new
 !
 !   Clean up
 !
   DEALLOCATE(xp)
   DEALLOCATE(left)
   DEALLOCATE(xgrid)
   CALL destroy_sp(splx)
 !
 CONTAINS
   LOGICAL ELEMENTAL FUNCTION in_interv(x, l)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(in) :: l
     in_interv = x.GE.xgrid(l) .AND. x.LT.xgrid(l+1)
   END FUNCTION in_interv  
 END PROGRAM main
diff --git a/examples/tmassmat.f90 b/examples/tmassmat.f90
index 679f965..b284969 100644
--- a/examples/tmassmat.f90
+++ b/examples/tmassmat.f90
@@ -1,189 +1,189 @@
 !>
 !> @file tmassmat.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   FT of mass matrix
 !
   USE bsplines
   USE matrix
 !
   IMPLICIT NONE
   INTEGER :: nx, nidbas
   INTEGER :: ngauss, nrank, kl, ku
   INTEGER :: i, k, kmin, kmax
   DOUBLE PRECISION :: pi, xlenght, dx, arg0, arg
   DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), arow(:)
   DOUBLE PRECISION, ALLOCATABLE :: fftmassm1(:), fftmassm2(:), fftmassm3(:)
   DOUBLE PRECISION, ALLOCATABLE :: fftmass_shifted(:)
   TYPE(spline1d) :: splx
   TYPE(periodic_mat) :: massm
 !
   NAMELIST /newrun/ nx, nidbas, xlenght
 !================================================================================
 !              1.0 Prologue
 !
   pi = 4.0d0*ATAN(1.0d0)
 !
   nx = 8
   nidbas = 3
   xlenght = 2.0d0*pi
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
   ngauss = nidbas+1  ! Exact integration for polynomials of degree 2*nidbas
 !
   ALLOCATE(xgrid(0:nx))
   dx = xlenght/REAL(nx)
   xgrid = (/ (i*dx,i=0,nx) /)
   WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
 !
   CALL set_spline(nidbas, ngauss, xgrid, splx, .TRUE.)
 !===========================================================================
 !              2.0  Mass matrix
   nrank = nx
   kl = nidbas
   ku = kl
   CALL init(kl, ku, nrank, 1, massm)
   CALL dismat(splx, massm)
 !
   ALLOCATE(arow(nrank))
 !!$  WRITE(*,'(/a)') 'Mass matrix'
 !!$  DO i=1,nrank
 !!$     CALL getrow(massm, i, arow)
 !!$     WRITE(*,'(10(1pe12.4))') arow
 !!$  END DO
 !===========================================================================
 !              3.0  Fourier transform of Mass matrix
 !
   ALLOCATE(fftmassm1(0:nx-1))
   ALLOCATE(fftmassm2(0:nx-1))
   ALLOCATE(fftmassm3(0:nx-1))
   IF(nidbas.LE.3) THEN
      CALL analytic(nidbas, fftmassm1)
      fftmassm1 = dx*fftmassm1
      WRITE(*,'(/a/(10(1pe12.4)))') 'Analytic', fftmassm1
   END IF
 !
   CALL calc_fftmass_old(splx, fftmassm2)
   WRITE(*,'(/a/(10(1pe12.4)))') 'Old version', fftmassm2
   IF(nidbas.LE.3) THEN
      WRITE(*,'(a,1pe12.4)') 'error =', MAXVAL(ABS(fftmassm2-fftmassm1))
   END IF
 !
 !   Init DFT
   kmin = -nx/2
   kmax = nx/2-1
   CALL init_dft(splx, kmin, kmax)
 !
   ALLOCATE(fftmass_shifted(kmin:kmax))
   CALL calc_fftmass(splx, fftmass_shifted)
   DO k=kmin, kmax
      fftmassm3(MODULO(k+nx,nx))=fftmass_shifted(k)
   END DO
   WRITE(*,'(/a/(10(1pe12.4)))') 'New version', fftmassm3
   WRITE(*,'(a,1pe12.4)') 'error =', MAXVAL(ABS(fftmassm3-fftmassm2))
 !
 !   Check dftcoefs
   WRITE(*,'(/a)') 'Check DFT of splines'
   PRINT*, 'dims of dftcoefs', SHAPE(splx%dft%coefs)
   DO i=0,nidbas
      WRITE(*,'(a,i3,2(1pe12.4))') 'Sum of coefs for spline', i, &
           &                        SUM(splx%dft%coefs(:,i))
   END DO
 !===========================================================================
 !              9.0  Clean up
 !
   DEALLOCATE(xgrid)
   DEALLOCATE(fftmassm1)
   DEALLOCATE(fftmassm2)
   DEALLOCATE(fftmassm3)
   DEALLOCATE(fftmass_shifted)
   DEALLOCATE(arow)
   CALL destroy(massm)
   CALL destroy_sp(splx)
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dismat(spl, mat)
     TYPE(spline1d) :: spl
     TYPE(periodic_mat) :: mat
     INTEGER :: dim, nx, nidbas, ngauss
     DOUBLE PRECISION, ALLOCATABLE :: fun(:,:), xgauss(:), wgauss(:)
     INTEGER :: i, igauss, iw, jt, irow, jcol
     DOUBLE PRECISION :: contrib
 !
     CALL get_dim(spl, dim, nx, nidbas)
     ALLOCATE(fun(0:nidbas,1)) ! Spline
     CALL get_gauss(spl, ngauss)
     ALLOCATE(xgauss(ngauss), wgauss(ngauss))
 !
     DO i=1,nx
        CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
        DO igauss=1,ngauss
           CALL basfun(xgauss(igauss), spl, fun, i)
           DO jt=0,nidbas
              DO iw=0,nidbas
                 contrib = fun(jt,1) * fun(iw,1) * wgauss(igauss)
                 irow=MODULO(i+iw-1,nx) + 1   ! Periodic BC
                 jcol=MODULO(i+jt-1,nx) + 1
                 CALL updtmat(mat, irow, jcol, contrib)
              END DO
           END DO
        END DO
     END DO
 !
     DEALLOCATE(fun)
     DEALLOCATE(xgauss, wgauss)
   END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE analytic(nidbas, mat)
 !
 !   Analytic form for nidbas .le. 3
 !
     INTEGER, INTENT(in) :: nidbas
     DOUBLE PRECISION, INTENT(out) :: mat(0:)
     DOUBLE PRECISION :: arg0, arg, cosk
     INTEGER :: n, k
 !
     n = SIZE(mat)
     arg0 = 2.0d0*pi/REAL(n,8)
     DO k=0,n-1
        arg = k*arg0
        cosk = COS(arg)
        SELECT CASE (nidbas)
        CASE (1)
           mat(k) = (2.0d0 + cosk)/3.0d0
        CASE (2)
           mat(k) = (16.0d0 + cosk*(13.0d0+cosk))/30.0d0
        CASE (3)
           mat(k) = (272.0d0 + cosk*(297.0d0+cosk*(60.0d0+cosk)))/630.0d0
        CASE default
           WRITE(*,'(a,i4,a)') 'ANALYTIC: nidbas =', nidbas, ' is not implemented!'
           STOP
        END SELECT
     END DO
   END SUBROUTINE analytic
 END PROGRAM main
diff --git a/examples/tmatrix_gb.f90 b/examples/tmatrix_gb.f90
index 6183335..106c0b7 100644
--- a/examples/tmatrix_gb.f90
+++ b/examples/tmatrix_gb.f90
@@ -1,114 +1,114 @@
 !>
 !> @file tmatrix_gb.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test some routines of module matrix
 !
   USE matrix
   IMPLICIT NONE
   TYPE(gbmat) :: mata
   INTEGER, PARAMETER :: n=5, ku=3, kl=3
   DOUBLE PRECISION :: arr(n), fulla(n,n), base
   DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p
   INTEGER :: i, j, iaway, pow
   CHARACTER(len=32) :: str
 !
   CALL init(ku, ku, n, 0, mata)
   CALL getvalp(mata, p)
   PRINT*, 'shape of A: ', SHAPE(p)
 !
 !   Test updtmat
   p = 0.0d0
   DO j=1,n
      DO i=1,n
         arr(i) = 10*i + j
         IF( ABS(j-i) .LT. ku+1 ) CALL updtmat(mata, i, j, arr(i))
      END DO
   END DO
   CALL prntmat('Test of UPDTMAT', p)
 !
 !   Test PUTCOL
   p = 0.0d0
   DO j=1,n
      DO i=1,n
         arr(i) = 10*i + j
      END DO
      CALL putcol(mata, j, arr)
   END DO
   CALL prntmat('Test of PUTCOL', p)
 !
 !
 !   Test PUTROW
   p = 0.0d0
   DO i=1,n
      DO j=1,n
         arr(j) = 10*i + j
      END DO
      CALL putrow(mata, i, arr)
   END DO
   CALL prntmat('Test of PUTROW', p)
 !
 !   Test GETCOL
   fulla = 0.0
   DO j=1,n
      CALL getcol(mata, j, fulla(:,j))
   END DO
   CALL prntmat('Test of GETCOL', fulla)
 !
   iaway=4
   arr = 0.0d0
   arr(iaway) =1.0
   CALL putrow(mata, iaway, arr)
   CALL putcol(mata, iaway, arr)
   WRITE(str,'(a,i3)') 'Away on i = j =',iaway
   CALL prntmat(TRIM(str), p)
 !
 !   Test GETCOL
   fulla = 0.0
   DO j=1,n
      CALL getcol(mata, j, fulla(:,j))
   END DO
   CALL prntmat('Matrix full', fulla)
 !
 !   Test of determinant
   CALL determinant(mata, base, pow)
   CALL prntmat('Factored A (gb)', p)
   PRINT*, 'Prod. of factored A diagnonals', PRODUCT(p(kl+ku+1,:))
   WRITE(*,'(a,1pe15.6,i6)') 'Determinant(bas,power) =', base, pow
   PRINT*, 'Pivots ', mata%piv
 !
   call destroy(mata)
 CONTAINS
   SUBROUTINE prntmat(str, a)
     DOUBLE PRECISION, DIMENSION(:,:) :: a
     CHARACTER(len=*) :: str
     INTEGER :: i
     WRITE(*,'(a)') TRIM(str)
     DO i=1,SIZE(a,1)
        WRITE(*,'(10f8.1)') a(i,:)
     END DO
   END SUBROUTINE prntmat
 END PROGRAM main
diff --git a/examples/tmatrix_pb.f90 b/examples/tmatrix_pb.f90
index eae0fd4..01d7f92 100644
--- a/examples/tmatrix_pb.f90
+++ b/examples/tmatrix_pb.f90
@@ -1,143 +1,143 @@
 !>
 !> @file tmatrix_pb.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test some routines of module matrix
 !
   USE matrix
   IMPLICIT NONE
   TYPE(pbmat) :: mata, matb
   INTEGER, PARAMETER :: n=5, ku=3
   DOUBLE PRECISION :: arr(n), fulla(n,n), fullb(n,n), base
   DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p, pb
   INTEGER :: i, j, info, pow
 !
   CALL init(ku, n, 0, mata)
   CALL init(1, n, 0, matb)
   CALL getvalp(mata, p)
   CALL getvalp(matb, pb)
   PRINT*, 'shape of A: ', SHAPE(p)
 !
 !   Test updtmat
   p = 0.0d0
   DO i=1,n
      DO j=i,n
         arr(j) = 10*i + j
         IF( ABS(j-i) .LT. ku+1 ) CALL updtmat(mata, i, j, arr(j))
      END DO
   END DO
   CALL prntmat('Test of UPDTMAT', p)
 !
 !   Test GETCOL
   fulla = 0.0
   DO j=1,n
      CALL getcol(mata, j, fulla(:,j))
   END DO
   CALL prntmat('Full matrix from GETCOL', fulla)
 !
 !   Test GETROW
   fulla = 0.0
   DO i=1,n
      CALL getrow(mata, i, fulla(i,:))
   END DO
   CALL prntmat('Full matrix from GETROW', fulla)
 !
 !   Test PUTCOL
   p = 0.0d0
   DO j=1,n
      DO i=1,n
         arr(i) = 10*i + j
      END DO
      CALL putcol(mata, j, arr)
   END DO
   CALL prntmat('Test of PUTCOL', p)
 !
 !   Test PUTROW
   p = 0.0d0
   DO i=1,n
      DO j=1,n
         arr(j) = 10*i + j
      END DO
      CALL putrow(mata, i, arr)
   END DO
   CALL prntmat('Test of PUTROW', p)
 !
   arr = 0.0d0
   arr(2) =1.0
   CALL putrow(mata, 2, arr)
   CALL prntmat('Away on i=2, j=2', p)
 !
 !   Test GETCOL
   fulla = 0.0
   DO j=1,n
      CALL getcol(mata, j, fulla(:,j))
   END DO
   CALL prntmat('Full matrix from GETCOL', fulla)
 !
 !   Test GETROW
   fulla = 0.0
   DO i=1,n
      CALL getrow(mata, i, fulla(i,:))
   END DO
   CALL prntmat('Full matrix from GETROW', fulla)
 !
 !  Test GETELE
   fulla = 0.0
   DO i=1,n
      DO j=1,n
         IF(ABS(j-i).LT.ku+1) CALL getele(mata,i,j,fulla(i,j))
      END DO
   END DO
   CALL prntmat('Full matrix from GETELE', fulla)
 !
 !   Test of determinant
   fullb = 0.0
   DO i=1,n
      fullb(i,i) = 2.0d0
      IF(i.LT.n) fullb(i,i+1)=-1.0d0
      IF(i.GT.1) fullb(i,i-1)=-1.0d0
   END DO
   DO j=1,n
      CALL putcol(matb, j, fullb(:,j))
   END DO
   CALL prntmat('Mat. A (full)', fullb)
   CALL prntmat('Mat. A (pb)', pb)
   CALL determinant(matb, base, pow)
   WRITE(*,'(a,1pe15.6,i6)') 'Determinant(bas,power) =', base, pow
 !
   CALL destroy(mata)
   CALL destroy(matb)
 CONTAINS
   SUBROUTINE prntmat(str, a)
     DOUBLE PRECISION, DIMENSION(:,:) :: a
     CHARACTER(len=*) :: str
     INTEGER :: i
     WRITE(*,'(a)') TRIM(str)
     DO i=1,SIZE(a,1)
        WRITE(*,'(10f8.1)') a(i,:)
     END DO
   END SUBROUTINE prntmat
 END PROGRAM main
diff --git a/examples/tmatrix_zpb.f90 b/examples/tmatrix_zpb.f90
index ba7c30d..2340d0b 100644
--- a/examples/tmatrix_zpb.f90
+++ b/examples/tmatrix_zpb.f90
@@ -1,137 +1,137 @@
 !>
 !> @file tmatrix_zpb.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test some routines of module matrix
 !
   USE matrix
   IMPLICIT NONE
   TYPE(zpbmat) :: mata, matb
   INTEGER, PARAMETER :: n=5, ku=3
   DOUBLE COMPLEX :: arr(n), fulla(n,n), fullb(n,n), base
   DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: p, pb
   INTEGER :: i, j, pow
 !
   CALL init(ku, n, 0, mata)
   CALL init(1, n, 0, matb)
   CALL getvalp(mata, p)
   CALL getvalp(matb, pb)
   PRINT*, 'shape of A: ', SHAPE(p)
 !
 !   Test updtmat
   p = 0.0d0
   DO i=1,n
      DO j=i,n
         arr(j) = CMPLX(10*i + j, j-i)
         IF( ABS(j-i) .LT. ku+1 ) CALL updtmat(mata, i, j, arr(j))
      END DO
   END DO
   CALL prntmat('Test of UPDTMAT', p)
 !
 !   Test GETCOL
   fulla = 0.0
   DO j=1,n
      CALL getcol(mata, j, fulla(:,j))
   END DO
   CALL prntmat('Full matrix from GETCOL', fulla)
 !
 !   Test GETROW
   fulla = 0.0
   DO i=1,n
      CALL getrow(mata, i, fulla(i,:))
   END DO
   CALL prntmat('Full matrix from GETROW', fulla)
 !
 !   Test PUTCOL
   p = 0.0d0
   DO j=1,n
      CALL putcol(mata, j, fulla(:,j))
   END DO
   CALL prntmat('Test of PUTCOL', p)
 !
 !   Test PUTROW
   p = 0.0d0
   DO i=1,n
      CALL putrow(mata, i, fulla(i,:))
   END DO
   CALL prntmat('Test of PUTROW', p)
 !
   arr = 0.0d0
   arr(2) =1.0
   CALL putrow(mata, 2, arr)
   CALL prntmat('Away on i=2, j=2', p)
 !
 !   Test GETCOL
   fulla = 0.0
   DO j=1,n
      CALL getcol(mata, j, fulla(:,j))
   END DO
   CALL prntmat('Full matrix from GETCOL', fulla)
 !
 !   Test GETROW
   fulla = 0.0
   DO i=1,n
      CALL getrow(mata, i, fulla(i,:))
   END DO
   CALL prntmat('Full matrix from GETROW', fulla)
 !
 !  Test GETELE
   fulla = 0.0
   DO i=1,n
      DO j=1,n
         IF(ABS(j-i).LT.ku+1) CALL getele(mata,i,j,fulla(i,j))
      END DO
   END DO
   CALL prntmat('Full matrix from GETELE', fulla)
 !
 !   Test of determinant
   fullb = 0.0
   DO i=1,n
      fullb(i,i) = 2.0d0
      IF(i.LT.n) fullb(i,i+1)=-1.0d0
      IF(i.GT.1) fullb(i,i-1)=-1.0d0
   END DO
   DO j=1,n
      CALL putcol(matb, j, fullb(:,j))
   END DO
   CALL prntmat('Mat. A (full)', fullb)
   CALL prntmat('Mat. A (pb)', pb)
   CALL determinant(matb, base, pow)
   WRITE(*,'(a,2f8.5,i3)') 'Determinant(base,power) = ', base, pow
 !
   CALL destroy(mata)
   CALL destroy(matb)
 CONTAINS
   SUBROUTINE prntmat(str, a)
     DOUBLE COMPLEX, DIMENSION(:,:) :: a
     CHARACTER(len=*) :: str
     INTEGER :: i
     WRITE(*,'(a)') TRIM(str)
     DO i=1,SIZE(a,1)
        WRITE(*,'(5(5x,"(",f5.1,",",f5.1,")"))') a(i,:)
     END DO
   END SUBROUTINE prntmat
 END PROGRAM main
diff --git a/examples/tp2p_mat.f90 b/examples/tp2p_mat.f90
index d5084fb..3a6b318 100644
--- a/examples/tp2p_mat.f90
+++ b/examples/tp2p_mat.f90
@@ -1,108 +1,108 @@
 !>
 !> @file tp2p_mat.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
   USE pardiso_bsplines
   IMPLICIT NONE
   INCLUDE 'mpif.h'
 !
   INTEGER :: me, npes, ierr
   INTEGER :: next
   INTEGER :: i, j, rank
   DOUBLE PRECISION :: val
   DOUBLE PRECISION, ALLOCATABLE :: arow(:)
   TYPE(pardiso_mat) :: mat
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
   IF(npes.NE.2) THEN
      PRINT*, 'Should run with 2 procs!'
      CALL mpi_abort(MPI_COMM_WORLD, -1, ierr)
   END IF
 !
 !   Define local matrix
 !
   rank = npes
   CALL init(npes, 0, mat)
   DO i=1,rank  ! Fill row me+1
      val = me+1
      j = me+1
      CALL updtmat(mat, i, me+1, val)
   END DO
 !
 !  Exchange matrix 
 !
   CALL disp_mat('Original matrix')
   next=MODULO(me+1,2)
 !
   IF(me.EQ.0) THEN
      CALL p2p_mat(mat, 1, 'send', 'put', MPI_COMM_WORLD)
   ELSE
      CALL p2p_mat(mat, 0, 'recv', 'put', MPI_COMM_WORLD)
   END IF
   CALL disp_mat('Matrix after 0->1/put')
 !
   CALL p2p_mat(mat, next, 'sendrecv', 'put', MPI_COMM_WORLD)
   CALL disp_mat('Matrix after sendrev/put')
 !
   CALL p2p_mat(mat, next, 'sendrecv', 'updt', MPI_COMM_WORLD)
   CALL disp_mat('Matrix after sendrev/updt')
 !
   IF(me.EQ.1) THEN
      CALL p2p_mat(mat, 0, 'send', 'updt', MPI_COMM_WORLD)
   ELSE
      CALL p2p_mat(mat, 1, 'recv', 'updt', MPI_COMM_WORLD)
   END IF
   CALL disp_mat('Matrix after 1->0/updt')
 !
   IF(me.EQ.1) THEN
      CALL p2p_mat(mat, 0, 'send', 'put', MPI_COMM_WORLD)
   ELSE
      CALL p2p_mat(mat, 1, 'recv', 'put', MPI_COMM_WORLD)
   END IF
   CALL disp_mat('Matrix after 1->0/put')
 !
   CALL mpi_finalize(ierr)
 CONTAINS
   SUBROUTINE disp_mat(str)
     CHARACTER(len=*), INTENT(in) :: str
     INTEGER :: p
     DO p=0,npes-1
        IF(me.EQ.p) THEN
           WRITE(*,'(a,i3.3)') str//' on PE', me
           CALL to_mat(mat, nlkeep=.TRUE.)
           ALLOCATE(arow(mat%rank))
           DO i=1,mat%rank
              CALL getrow(mat, i, arow)
              WRITE(*,'(10f8.2)') arow
           END DO
           DEALLOCATE(arow)
           CALL FLUSH(6)
        END IF
        CALL mpi_barrier(MPI_COMM_WORLD, ierr)
     END DO
   END SUBROUTINE disp_mat
 END PROGRAM main
diff --git a/examples/tpsum_mat.f90 b/examples/tpsum_mat.f90
index 5f01441..30d30f9 100644
--- a/examples/tpsum_mat.f90
+++ b/examples/tpsum_mat.f90
@@ -1,77 +1,77 @@
 !>
 !> @file tpsum_mat.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !!$  USE pardiso_bsplines
 !!$  USE wsmp_bsplines
   USE mumps_bsplines
   IMPLICIT NONE
   INCLUDE 'mpif.h'
 !
   INTEGER :: me, npes, ierr
   INTEGER :: i, j, rank
   DOUBLE PRECISION :: val
   DOUBLE PRECISION, ALLOCATABLE :: arow(:)
 !!$  TYPE(pardiso_mat) :: mat
 !!$  TYPE(wsmp_mat) :: mat
   TYPE(mumps_mat) :: mat
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
   rank = npes
   CALL init(npes, 1, mat)
   DO i=1,rank  ! Fill row me+1
      val = me+1
      j = me+1
      CALL updtmat(mat, i, me+1, val)
   END DO
 !
 !!$  CALL disp_mat('Original matrix')
   CALL psum_mat(mat, MPI_COMM_WORLD)
   CALL disp_mat('Global sum of matrix')
 !
   CALL mpi_finalize(ierr)
 CONTAINS
   SUBROUTINE disp_mat(str)
     CHARACTER(len=*), INTENT(in) :: str
     INTEGER :: p
     DO p=0,npes-1
        IF(me.EQ.p) THEN
           CALL to_mat(mat, nlkeep=.TRUE.)
           WRITE(*,'(a,i3.3,a,2i6)') str//' on PE', me, ': rank, nnz', mat%rank, mat%nnz
           ALLOCATE(arow(mat%rank))
           DO i=1,mat%rank
              CALL getrow(mat, i, arow)
              WRITE(*,'(10f8.2)') arow
           END DO
           DEALLOCATE(arow)
           CALL FLUSH(6)
        END IF
        CALL mpi_barrier(MPI_COMM_WORLD, ierr)
     END DO
   END SUBROUTINE disp_mat
 END PROGRAM main
diff --git a/examples/tsparse1.f90 b/examples/tsparse1.f90
index c22a07f..eb3e44c 100644
--- a/examples/tsparse1.f90
+++ b/examples/tsparse1.f90
@@ -1,117 +1,117 @@
 !>
 !> @file tsparse1.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! MAIN !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 PROGRAM main
   USE sparse
   IMPLICIT none
 !  
   TYPE(zspmat) :: amat
   TYPE(zsprow) :: arow
 !
   LOGICAL          :: found
   INTEGER          :: n=10
   INTEGER          :: jcol, nnz, i
   DOUBLE PRECISION :: valr, vali
   DOUBLE COMPLEX :: val, zero=(0.,0.)
   DOUBLE COMPLEX, ALLOCATABLE :: arr(:), farr(:)
   INTEGER, ALLOCATABLE :: col(:), newcol(:)
 !
 !   Initialize the sparse matrix amat
 !
   CALL init(n, amat)
 !
 !   Use UPDT_SP to update a sparse row
 !
   WRITE(*,*) 'Enter a list of positive indices, terminate with a zero'
   DO
      READ(*,*) jcol
      IF(jcol .LE. 0) EXIT
      CALL RANDOM_NUMBER(valr)
      vali = jcol
      val = CMPLX(valr, vali)
      CALL updtmat(arow, jcol, val)
   END DO
 !
 !   Convert a sparse row to a sequential row
 !
   nnz = arow%nnz
   WRITE(*,'(a,i5)') 'nnz =', nnz
 !
   ALLOCATE(arr(nnz), col(nnz), newcol(nnz))
   CALL getrow(arow, arr, col)
   WRITE(*, '(a/(10i8))') 'col', col
   WRITE(*, '(a/(10f8.4))') 'arr', arr
 !
   ALLOCATE(farr(MAXVAL(col)))
   CALL getrow(arow, farr)
   WRITE(*, '(/a/(10f8.4))') 'farr', farr
 !
 !   Clear element by element of row
 !
   DO i=1,nnz
      CALL putele(arow, col(i), zero)
      CALL getrow(arow, arr, newcol)
      WRITE(*, '(/a,i6/(10i8))') 'col', arow%nnz, newcol(1:arow%nnz)
   END DO
 !
 !   Re-create row using PUTROW and full row
 !
   CALL putrow(arow, farr)
   CALL getrow(arow, arr, col)
   WRITE(*,'(/a,i5)') 'nnz =', arow%nnz
   WRITE(*, '(a/(10i8))') 'col', col
   WRITE(*, '(a/(10f8.4))') 'arr', arr
 !
 !   Clear row using DESTROY
 !
   CALL destroy(arow)
   CALL getrow(arow, arr, newcol)
   nnz = arow%nnz
   WRITE(*, '(/a,i6/(10i8))') 'col', nnz, newcol(1:nnz)
 !
 !   Re-create row using PUTROW and sparse row
 !
   CALL putrow(arow, arr, col)
   CALL getrow(arow, arr, newcol, nnz)
   WRITE(*, '(/a,i6/(10i8))') 'col', nnz, newcol(1:nnz)
   WRITE(*, '(a/(10f8.4))') 'arr', arr
 !
 !   Test GETELE
 !
   i=111;val=0
   CALL getele(arow, i, val, found)
   WRITE(*,'(/i8,2f8.4,l3)') i, val, found
   DO i=1,nnz
      CALL getele(arow, col(i), val, found)
      WRITE(*,'(i8,2f8.4,l3)') col(i), val, found
   END DO
 !
 !   Test destroy_spmat
 !
   CALL destroy(amat)
 END PROGRAM main
diff --git a/examples/tsparse2.f90 b/examples/tsparse2.f90
index 23d3fcf..98efd21 100644
--- a/examples/tsparse2.f90
+++ b/examples/tsparse2.f90
@@ -1,151 +1,151 @@
 !>
 !> @file tsparse2.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Simple 2D Poisson using 5 points FD
 !
   USE pardiso_bsplines
   IMPLICIT NONE
 !
   TYPE(pardiso_mat) :: amat, bmat
   DOUBLE PRECISION, ALLOCATABLE :: arow(:), rhs(:), sol(:)
   DOUBLE PRECISION, ALLOCATABLE :: arown(:,:), rhsn(:,:), soln(:,:)
   INTEGER :: nx=5, ny=4, n
   INTEGER :: i, j, irow, jcol
 !
   DOUBLE PRECISION   :: mem, seconds
 !
   n = nx*ny
   CALL init(n, 1, amat)                ! Non-symmetric matrix
   CALL init(n, 1, bmat, nlsym=.TRUE.)  ! Symmetric matrix
   ALLOCATE(arow(n), arown(n,2))
   ALLOCATE(rhs(n), rhsn(n,2))
   ALLOCATE(sol(n), soln(n,2))
 !
 !  Construct the FD matrix amat, using sparse rows (linked lists)
 !
   DO j=1,ny
      DO i=1,nx
         arow = 0.0d0
         irow = numb(i,j)
         arow(irow) = 4.0d0
         IF(i.GT.1)  arow(numb(i-1,j)) = -1.0d0
         IF(i.LT.nx) arow(numb(i+1,j)) = -1.0d0
         IF(j.GT.1)  arow(numb(i,j-1)) = -1.0d0
         IF(j.LT.ny) arow(numb(i,j+1)) = -1.0d0
         rhs(irow) = SUM(arow)
         CALL putrow(amat, irow, arow) ! General matrix
         CALL putrow(bmat, irow, arow) ! Symmetric matrix
      END DO
   END DO
 !
 !   Print the matrices
 !
   WRITE(*,'(/a)') 'Matrix A'
   DO i=1,n
      CALL getrow(amat, i, arow)
      WRITE(*,'(30f4.0)') arow
   END DO
   PRINT*, 'nnz from get_count', get_count(amat)
 !
   WRITE(*,'(/a)') 'Matrix B'
   DO i=1,n
      CALL getrow(bmat, i, arow)
      WRITE(*,'(30f4.0)') arow
   END DO
   PRINT*, 'nnz from get_count', get_count(bmat)
 !
 !   Factor the matrix using Pardiso
 !
   CALL factor(amat, nlmetis=.TRUE.)
   WRITE(*,'(/a,i5)') 'Number of nonzeros in factors of A  = ',amat%p%iparm(18)
   WRITE(*,'(a,i5)')  'Number of factorization MFLOPS      = ',amat%p%iparm(19)
 !
   CALL factor(bmat, nlmetis=.TRUE.)
   WRITE(*,'(/a,i5)') 'Number of nonzeros in factors of B  = ',bmat%p%iparm(18)
   WRITE(*,'(a,i5)')  'Number of factorization MFLOPS      = ',bmat%p%iparm(19)
 !
   WRITE(*,'(/a/(10f8.4))') 'rhs', rhs
 !
 !   Backsolve Ax = b, using Pardiso
 !
   sol = rhs
   CALL bsolve(amat, sol, debug=.FALSE.)
   WRITE(*,'(/a/(10f8.4))') 'sol (non-sym)', sol
   WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0))
 !
 !   Backsolve Bx = b, using Pardiso
 !
   sol = rhs
   CALL bsolve(bmat, sol, debug=.FALSE.)
   WRITE(*,'(/a/(10f8.4))') 'sol (sym)', sol
   WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0))
 !
   arow = vmx(amat, sol)
   WRITE(*,'(/a/(10f8.4))') 'A*x (non-sym)', arow
   WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arow-rhs))
 !
   arow = vmx(bmat, sol)
   WRITE(*,'(/a/(10f8.4))') 'B*x (sym)', arow
   WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arow-rhs))
 !
 !   Multiple RHS
 !
   rhsn(:,1) = -rhs(:)
   rhsn(:,2) = 2.d0*rhs(:)
   CALL bsolve(amat, rhsn, soln)
   WRITE(*,'(/a/(10f8.4))') 'soln (non-sym)', soln
   WRITE(*,'(a,2(1pe12.3))') 'Error', MAXVAL(ABS(soln(:,1)+1.0d0)), &
        &                  MAXVAL(ABS(soln(:,2)-2.0d0))
 !
   CALL bsolve(bmat, rhsn, soln)
   WRITE(*,'(/a/(10f8.4))') 'soln (sym)', soln
   WRITE(*,'(a,2(1pe12.3))') 'Error', MAXVAL(ABS(soln(:,1)+1.0d0)), &
        &                  MAXVAL(ABS(soln(:,2)-2.0d0))
 !
   arown = vmx(amat, soln)
   WRITE(*,'(/a/(10f8.4))') 'A*x (non-sym)', arown
   WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arown-rhsn))
 !
   arown = vmx(bmat, soln)
   WRITE(*,'(/a/(10f8.4))') 'A*x (sym)', arown
   WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arown-rhsn))
 !
 !   Clean up
 !
   DEALLOCATE(arow,arown)
   DEALLOCATE(rhs,rhsn)
   DEALLOCATE(sol,soln)
   CALL destroy(amat)
   CALL destroy(bmat)
 CONTAINS
   INTEGER FUNCTION numb(i,j)
     INTEGER, INTENT(in) :: i, j
     numb = (j-1)*nx + i
   END FUNCTION numb
 !
 END PROGRAM main
 
diff --git a/examples/zpardiso_ex1.f b/examples/zpardiso_ex1.f
index 7979d88..33027c6 100644
--- a/examples/zpardiso_ex1.f
+++ b/examples/zpardiso_ex1.f
@@ -1,96 +1,96 @@
 !>
 !> @file zpardiso_ex1.f
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
       PROGRAM main
       USE wsmp_bsplines
       USE pardiso_bsplines
       IMPLICIT NONE
 c
       INTEGER :: n=9
       INTEGER ia(10)
       INTEGER ja(29)
       COMPLEX*16 avals(29)
       COMPLEX*16 b(9), sol(9), arow(9)
 c
 c$$$      type(zwsmp_mat) :: mat
       type(zpardiso_mat) :: mat
       integer :: i, k
 c
       DATA ia /1,5,9,13,17,21,25,27,29,30/
       data ja 
      1        /1,          3,                      7,    8,
      2	             2,    3,                            8,    9,
      3                     3,                      7,    8,    9,
      4                           4,          6,    7,    8,
      5                                 5,    6,          8,    9,
      6                                       6,    7,    8,    9,
      7                                             7,    8,
      8                                                   8,    9,
      9                                                         9/
       data avals 
      1 /(14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1),
      2 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2),
      3 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1),
      4 (14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1),
      5 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2),
      6 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1),
      7 (16.d0,0.d0), (-4.d0,-2.d-1),
      8 (71.d0,0.d0), (-4.d0,-2.d-1),
      9 (16.d0,0.d0)/ 
 c
 c$$$      call init(n, 1, mat, nlherm=.false., nlsym=.true., nlpos=.true.)
       call init(n, 1, mat, nlherm=.true., nlpos=.true.)
       do i=1,n
          do k=ia(i),ia(i+1)-1
             call putele(mat, i, ja(k), avals(k))
          end do
       end do
 c
       call factor(mat)
 c
       print*, 'diff of val', cnorm2(avals-mat%val)
       print*, 'diff of ia', ia-mat%irow
       print*,' diff ja', ja-mat%cols
 c
       print*, 'The RHS:'
       do i = 1, n
          call getrow(mat,i, arow)
          b(i) = sum(arow)
          print *,i,' : ',b(i)
       end do
       call bsolve(mat,b,sol)
       print *,'The solution of the system is as follows:'
       do i = 1, n
          print *,i,' : ',sol(i)
       end do
       print*, 'Residue =', cnorm2(vmx(mat,sol)-b)
       contains
       FUNCTION cnorm2(x)
       DOUBLE COMPLEX, INTENT(in) :: x(:)
       DOUBLE PRECISION :: cnorm2
       cnorm2 = SQRT(DOT_PRODUCT(x,x))    
       END FUNCTION cnorm2
 c
       END PROGRAM main
diff --git a/examples/zssmp_ex1.f b/examples/zssmp_ex1.f
index cae1c08..dbe84fc 100644
--- a/examples/zssmp_ex1.f
+++ b/examples/zssmp_ex1.f
@@ -1,111 +1,111 @@
 !>
 !> @file zssmp_ex1.f
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
       PROGRAM main
       USE wsmp_bsplines
       USE pardiso_bsplines
       IMPLICIT NONE
 c
       INTEGER :: n=9
       INTEGER ia(10)
       INTEGER ja(29)
       COMPLEX*16 avals(29)
       COMPLEX*16 b(9), sol(9), arow(9)
 c
       type(zwsmp_mat) :: mat
       integer :: i, k
 c
       DATA ia /1,5,9,13,17,21,25,27,29,30/
       data ja 
      1        /1,          3,                      7,    8,
      2	             2,    3,                            8,    9,
      3                     3,                      7,    8,    9,
      4                           4,          6,    7,    8,
      5                                 5,    6,          8,    9,
      6                                       6,    7,    8,    9,
      7                                             7,    8,
      8                                                   8,    9,
      9                                                         9/
       data avals 
      1 /(14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1),
      2 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2),
      3 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1),
      4 (14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1),
      5 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2),
      6 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1),
      7 (16.d0,0.d0), (-4.d0,-2.d-1),
      8 (71.d0,0.d0), (-4.d0,-2.d-1),
      9 (16.d0,0.d0)/ 
 c
       call init(n, 1, mat, nlherm=.true., nlpos=.true.)
       do i=1,n
          do k=ia(i),ia(i+1)-1
             call putele(mat, i, ja(k), avals(k))
          end do
       end do
 c
       print*, 'The RHS before tomat:'
       do i = 1, n
          call getrow(mat,i, arow)
          b(i) = sum(arow)
          print *,i,' : ',b(i)
       end do
 c
       call factor(mat)
 c
       write(*,'(a/(20f6.2))') 'avals', avals
       write(*,'(a/(20f6.2))') 'mat%val', mat%val
       print*, 'diff of val', cnorm2(avals-mat%val)
       print*, 'diff of ia', ia-mat%irow
       print*,' diff ja', ja-mat%cols
 c
       print*, 'Check getrow'
       do i = 1, n
          call getrow(mat,i, arow)
          write(*,'(i3,": ",(20f6.2))') i, arow(i:n)
       end do
 c
       print*, 'The RHS:'
       do i = 1, n
          call getrow(mat,i, arow)
          b(i) = sum(arow)
          print *,i,' : ',b(i)
       end do
       call bsolve(mat,b,sol)
       print *,'Norm of Residual = ',mat%p%dparm(7)
       print *,'The solution of the system is as follows:'
       do i = 1, n
          print *,i,' : ',sol(i)
       end do
       print*, 'Residue =', cnorm2(vmx(mat,sol)-b)
 c
       contains
       FUNCTION cnorm2(x)
       DOUBLE COMPLEX, INTENT(in) :: x(:)
       DOUBLE PRECISION :: cnorm2
       cnorm2 = SQRT(DOT_PRODUCT(x,x))    
       END FUNCTION cnorm2
 c
       END PROGRAM main
diff --git a/fft/CMakeLists.txt b/fft/CMakeLists.txt
index 82f6ec0..1b099c2 100644
--- a/fft/CMakeLists.txt
+++ b/fft/CMakeLists.txt
@@ -1,69 +1,69 @@
 #
 # @file CMakeLists.txt
 #
 # @brief Principal CMake configuration file for the fft library
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Nicolas Richart <nicolas.richart@epfl.ch>
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 project(fft)
 
 find_package(FFTW REQUIRED)
 set(fft_w $ENV{fft_w})
 
 set(SRCS
   fft_fftw.F90
 )
 
 set(EXAMPLES tfft.f90)
 
 add_library(fft STATIC ${SRCS})
 
 target_include_directories(fft
   PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
           ${FFTW_INCLUDES}
   INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
             $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
             ${FFTW_INCLUDES}
   )
 
 if (${fft_w} MATCHES "fft_w2")
   target_compile_options(fft PRIVATE "-Dfft_w2")
 else()
   target_compile_options(fft PRIVATE "-Dfft_w3")
 endif()
 
 target_link_libraries(fft PUBLIC ${FFTW_LIBRARY} ${MPI_Fortran_LIBRARIES})
 #
 set_property(TARGET fft
   PROPERTY PUBLIC_HEADER ${CMAKE_CURRENT_BINARY_DIR}/modules/fft.mod)
 
 add_executable(tfft tfft.f90)
 target_link_libraries(tfft fft ${MPI_Fortran_LIBRARIES})
 
 #add_test(tfft ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1
 #  ${CMAKE_CURRENT_BINARY_DIR}/tfft < ${fft_SOURCE_DIR}/in)
 
 install(TARGETS fft
   EXPORT ${BSPLINES_EXPORT_TARGETS}
   ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
   PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
 )
diff --git a/fft/Makefile b/fft/Makefile
index bb7a818..895fdb9 100644
--- a/fft/Makefile
+++ b/fft/Makefile
@@ -1,68 +1,68 @@
 #
 # @file Makefile
 #
 # @brief Makefile for the fft library
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 FFTW=$(FFTW_HOME)
 F90 = ifort
 LD = ifort
 
 debug = -g -traceback -CB
 optim = -O3
 
 OPT=$(debug)
 #OPT=$(optim)
 
 F90FLAGS = $(OPT) -Dfft_w3 -I$(FFTW)/include
 LDFLAGS = $(OPT) -L. -L$(FFTW)/lib
 
 LIBS = -lfft -lfftw3
 
 .SUFFIXES:
 .SUFFIXES: .o .c .F90 .f90 .f
 
 .f90.o:
 	$(F90) $(F90FLAGS) -c $<
 .F90.o:
 	$(F90) $(F90FLAGS) -c $<
 .f.o:
 	$(F90) $(F90FLAGS) -c $<
 
 all:	tfft
 
 lib:    libfft.a
 
 libfft.a: fft_fftw.o
 	xiar r $@ $?
 	ranlib $@
 
 tfft:	tfft.o
 	$(LD) $(LDFLAGS) -o $@ tfft.o fft_fftw.o $(LIBS)
 
 tfft.o:	lib
 
 clean:
 	rm -f *.o *.mod *~ a.out
 
 distclean: clean
 	rm -f tfft
diff --git a/fft/fft_fftw.F90 b/fft/fft_fftw.F90
index ff9e7ca..cd404a8 100644
--- a/fft/fft_fftw.F90
+++ b/fft/fft_fftw.F90
@@ -1,1376 +1,1376 @@
 !>
 !> @file fft_fftw.F90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 #if defined(fft_w2)
 MODULE fft
 !
   IMPLICIT NONE
 !
   PRIVATE
 !
   PUBLIC four1, fourcol, fourrow
 !
 ! Global parameters
 !
   INTEGER, PARAMETER :: MXPLAN=8
 !
 ! Global variables
 !
   INTEGER ::n1d_saved=0
   INTEGER*8, DIMENSION(MXPLAN)             :: plan1d
   INTEGER,   DIMENSION(MXPLAN)             :: n1d
   REAL,      DIMENSION(:),     ALLOCATABLE :: scr1_real
   DOUBLE COMPLEX,   DIMENSION(:),     ALLOCATABLE :: scr1
 
   INTERFACE fourcol
      MODULE PROCEDURE four1, fourcol_ra, fourcol_raa
   END INTERFACE
 
 CONTAINS
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
   SUBROUTINE four1(arr, isign)
 !
 !  A single 1D complex FFT
 !
     INCLUDE 'fftw_f77.h'
 !
 ! Dummy arguments
 !
     DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: arr
     INTEGER,               INTENT(IN)    :: isign
 !
 ! Local variables
 !
     INTEGER :: n, id
 !
     n = SIZE(arr)
     IF( .NOT. ALLOCATED(scr1) ) THEN
        ALLOCATE(scr1(n))
     ELSE
        IF ( SIZE(scr1) < n ) THEN
           DEALLOCATE(scr1)
           ALLOCATE(scr1(n))
        END IF
     END IF
 !
     CALL getplan(n, isign, id, 1)
     CALL fftw_f77_one(plan1d(id), arr, scr1)
   END SUBROUTINE four1
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
   SUBROUTINE fourcol_ra(arr, isign)
 !
 !  1D complex FFT of columns of arr(1:N,1:howmany)
 !
      INCLUDE 'fftw_f77.h'
 !
 ! Dummy arguments
 !
     DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
     INTEGER,                 INTENT(IN)    :: isign
 !
 ! Local variables
 !
     INTEGER :: n, howmany, id
 !
     n = SIZE(arr,1)
     howmany = SIZE(arr,2)
 !
     IF( .NOT. ALLOCATED(scr1) ) THEN
        ALLOCATE(scr1(n))
     ELSE
        IF ( SIZE(scr1) < n ) THEN
           DEALLOCATE(scr1)
           ALLOCATE(scr1(n))
        END IF
     END IF
 !
     CALL getplan(n, isign, id,1)
     CALL fftw_f77(plan1d(id), howmany, arr, 1, n, scr1, 1, n)
   END SUBROUTINE fourcol_ra
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
   SUBROUTINE fourcol_raa(arr, isign)
 !
 !  1D complex FFT of columns of arr(1:N,1:howmany)
 !
      INCLUDE 'fftw_f77.h'
 !
 ! Dummy arguments
 !
     DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr
     INTEGER,                   INTENT(IN)    :: isign
 !
 ! Local variables
 !
     INTEGER :: n, howmany, id
 !
     n = SIZE(arr,1)
     howmany = SIZE(arr,2)*SIZE(arr,3)
 !
     IF( .NOT. ALLOCATED(scr1) ) THEN
        ALLOCATE(scr1(n))
     ELSE
        IF ( SIZE(scr1) < n ) THEN
           DEALLOCATE(scr1)
           ALLOCATE(scr1(n))
        END IF
     END IF
 !
     CALL getplan(n, isign, id, 1)
     CALL fftw_f77(plan1d(id), howmany, arr, 1, n, scr1, 1, n)
   END SUBROUTINE fourcol_raa
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
   SUBROUTINE fourrow(arr, isign)
 !
 !  1D complex FFT of rows of arr(1:howmany,1:N)
 !
     INCLUDE 'fftw_f77.h'
 !
 ! Dummy arguments
 !
     DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
     INTEGER,                 INTENT(IN)    :: isign
 !
 ! Local variables
 !
     INTEGER :: n, howmany, id
 !
     n = SIZE(arr,2)
     howmany = SIZE(arr,1)
 !
     IF( .NOT. ALLOCATED(scr1) ) THEN
        ALLOCATE(scr1(n))
     ELSE
        IF ( SIZE(scr1) < n ) THEN
           DEALLOCATE(scr1)
           ALLOCATE(scr1(n))
        END IF
     END IF
 !
     CALL getplan(n, isign, id, 1)
     CALL fftw_f77(plan1d(id), howmany, arr, howmany, 1, &
          & scr1, howmany, 1)
   END SUBROUTINE fourrow
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
   SUBROUTINE getplan(n, sign, id, complex_fftw)
 !
 !  Create or get an already created FFT plan (depends only on N.)
 !
     INCLUDE 'fftw_f77.h'
 !
 ! Dummy arguments
 !
     INTEGER, INTENT(IN)  :: n            ! size of transform
     INTEGER, INTENT(IN)  :: sign         ! dir. of transform -1=>FORWARD, +1=>BACKWARD
     INTEGER, INTENT(OUT) :: id           ! id of FFT plan
     INTEGER, INTENT(IN)  :: complex_fftw ! Create complex<->complex transform if =1
 !
 ! Local variables
 !
     INTEGER :: k, i, dir
 !
     k = sign*(2*n+complex_fftw)
     DO i = 1,n1d_saved
        IF( k == n1d(i)) THEN
           id = i
           RETURN
        END IF
     END DO
     IF( n1d_saved == MXPLAN) THEN
        PRINT*, 'Module fft: MXPLAN too small! Increase it and recompile'
        STOP
     END IF
     n1d_saved = n1d_saved+1
     n1d(n1d_saved) = k
     id = n1d_saved
     dir = FFTW_FORWARD
     IF( sign == +1 ) dir = FFTW_BACKWARD
     IF (complex_fftw == 1) THEN
        CALL fftw_f77_create_plan(plan1d(id), n, dir, FFTW_ESTIMATE + FFTW_IN_PLACE)
 !!$    ELSE
 !!$       CALL rfftw_f77_create_plan(plan1d(id), n, dir, FFTW_ESTIMATE + FFTW_IN_PLACE)
     END IF
   END SUBROUTINE getplan
 END MODULE fft
 #endif
 !
 #if defined(fft_w3)
 MODULE fft
 !
   IMPLICIT NONE
 !
   PRIVATE
   PUBLIC :: four1, fourcol, fourrow
 !
   INCLUDE 'fftw3.f'
 !
   TYPE int_para
      INTEGER, DIMENSION(2) :: par ! size of transform
   END TYPE int_para
 !
 ! Global parameters
 !
   INTEGER, PARAMETER :: MXPLAN=16 ! define the maximum number of plans.
 !
 ! Global variables
 !
   INTEGER*8,      DIMENSION(MXPLAN,4), SAVE :: plan1d      ! plans for 1-dim FFT
   TYPE(int_para), DIMENSION(MXPLAN,4), SAVE :: n1d_par
   INTEGER,        DIMENSION(4),        SAVE :: n1d_saved=0 ! number of plans saved
 !
   INTERFACE fourcol
      MODULE PROCEDURE four1, fourcol_ra, fourcol_raa
   END INTERFACE
 !
 CONTAINS
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
   SUBROUTINE four1(vec, isign)
 !
 ! Dummy arguments
 !
     DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: vec
     INTEGER,               INTENT(IN)    :: isign
 !
 ! Local parameters
 !
     INTEGER, PARAMETER :: NUM=1
 !
 ! Local variables
 !
     INTEGER :: k
     INTEGER :: dim1, i, id, istat, n
     DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: vec_tmp
 !
 !
     n = SIZE(vec)
 !
 ! test if a plan that fits is already created.
 !
     id = -1
     k = isign*n
     DO i = 1,n1d_saved(NUM)
        IF (k == n1d_par(i,NUM)%par(1)) THEN
           id = i
           EXIT
        END IF
     END DO
 !
     SELECT CASE (id)
     CASE (-1)
 !
 ! test if the maximal number of plans is alredy reached.
 !
        IF (n1d_saved(NUM) == MXPLAN) THEN
            WRITE(*,*) 'FOUR1: MXPLAN too small! Increase it and recompile'
           STOP
        END IF
 !
        dim1 = SIZE(vec)
        ALLOCATE(vec_tmp(dim1), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOUR1: Allocation of  vec_tmp  failed!'
           STOP
        END IF
 !
        n1d_saved(NUM) = n1d_saved(NUM)+1
        n1d_par(n1d_saved(NUM),NUM)%par(1) = k
        id = n1d_saved(NUM)
 !
        SELECT CASE (isign)
        CASE (-1)
           CALL dfftw_plan_dft_1d(plan1d(id,NUM), n, vec_tmp(1), vec_tmp(1), &
                FFTW_FORWARD, FFTW_ESTIMATE)
        CASE (1)
           CALL dfftw_plan_dft_1d(plan1d(id,NUM), n, vec_tmp(1), vec_tmp(1), &
                FFTW_BACKWARD, FFTW_ESTIMATE)
        END SELECT
 !
        DEALLOCATE(vec_tmp, stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOUR1: Dellocation of  vec_tmp  failed!'
           STOP
        END IF
 !
     END SELECT
 !
     CALL dfftw_execute_dft(plan1d(id,NUM), vec(1), vec(1))
 !
   END SUBROUTINE four1
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
   SUBROUTINE fourcol_ra(arr, isign)
 !
 ! Dummy arguments
 !
     DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
     INTEGER,                 INTENT(IN)    :: isign
 !
 ! Local parameters
 !
     INTEGER, PARAMETER :: NUM=2, RANK=1
 !
 ! Local variables
 !
     INTEGER :: k
     INTEGER :: dim1, dim2, howmany, i, id, istat, n
     INTEGER, DIMENSION(RANK)             :: n_arr, nembed
     DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: arr_tmp
 !
     dim1 = SIZE(arr,1)
     dim2 = SIZE(arr,2)
     howmany = SIZE(arr,2)
 !
 ! test if a plan that fits is already created.
 !
     id = -1
     k = isign*dim1
     DO i = 1,n1d_saved(NUM)
        IF (k == n1d_par(i,NUM)%par(1) .AND. &
             howmany == n1d_par(i,NUM)%par(2)) THEN
           id = i
           EXIT
        END IF
     END DO
 !
     SELECT CASE (id)
     CASE (-1)
 !
 ! test if the maximal number of plans is alredy reached.
 !
        IF (n1d_saved(NUM) == MXPLAN) THEN
           WRITE(*,*) 'FOURCOL_RA: MXPLAN too small! Increase it and recompile'
           STOP
        END IF
 !
        ALLOCATE(arr_tmp(dim1, dim2), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOURCOL_RA: Allocation of  arr_tmp  failed!'
           STOP
        END IF
 !
        nembed(1) = SIZE(arr)
        n_arr(1)  = dim1
        n         = n_arr(1)
 !
        n1d_saved(NUM) = n1d_saved(NUM)+1
        n1d_par(n1d_saved(NUM),NUM)%par(1) = k
        n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
        id = n1d_saved(NUM)
 !
        SELECT CASE (isign)
        CASE (-1)
           CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, &
                arr_tmp(1,1), nembed, 1, n, arr_tmp(1,1), nembed, 1, n, &
                FFTW_FORWARD, FFTW_ESTIMATE)
        CASE (1)
           CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, &
                arr_tmp(1,1), nembed, 1, n, arr_tmp(1,1), nembed, 1, n, &
                FFTW_BACKWARD, FFTW_ESTIMATE)
        END SELECT
 !
        DEALLOCATE(arr_tmp, stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOURCOL_RA: Dellocation of  arr_tmp  failed!'
           STOP
        END IF
 !
     END SELECT
 !
     CALL dfftw_execute_dft(plan1d(id,NUM), arr(1,1), arr(1,1))
 !
   END SUBROUTINE fourcol_ra
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
   SUBROUTINE fourcol_raa(arr, isign)
 !
 ! Dummy arguments
 !
     DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr
     INTEGER,                   INTENT(IN)    :: isign
 !
 ! Local parameters
 !
     INTEGER, PARAMETER :: NUM=3, RANK=1
 !
 ! Local variables
 !
     INTEGER :: k
     INTEGER :: dim1, dim2, dim3, howmany, i, id, istat, n
     INTEGER, DIMENSION(RANK)               :: n_arr, nembed
     DOUBLE COMPLEX, DIMENSION(:,:,:), ALLOCATABLE :: arr_tmp
 !
     dim1 = SIZE(arr,1)
     dim2 = SIZE(arr,2)
     dim3 = SIZE(arr,3)
     howmany = dim2*dim3
 !
 ! test if a plan that fits is already created.
 !
     id = -1
     k = isign*dim1
     DO i = 1,n1d_saved(NUM)
        IF (k == n1d_par(i,NUM)%par(1) .AND. &
             howmany == n1d_par(i,NUM)%par(2)) THEN
           id = i
           EXIT
        END IF
     END DO
 !
     SELECT CASE (id)
     CASE (-1)
 !
 ! test if the maximal number of plans is alredy reached.
 !
        IF (n1d_saved(NUM) == MXPLAN) THEN
           WRITE(*,*) 'FOURCOL_RAA: MXPLAN too small! Increase it and recompile'
           STOP
        END IF
 !
        ALLOCATE(arr_tmp(dim1, dim2, dim3), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOURCOL_RAA: Allocation of  arr_tmp  failed!'
           STOP
        END IF
 !
        nembed(1) = SIZE(arr)
        n_arr(1)  = dim1
        n         = n_arr(1)
 !
        n1d_saved(NUM) = n1d_saved(NUM)+1
        n1d_par(n1d_saved(NUM),NUM)%par(1) = k
        n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
        id = n1d_saved(NUM)
 !
        SELECT CASE (isign)
        CASE (-1)
           CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, &
                arr_tmp(1,1,1), nembed, 1, n, arr_tmp(1,1,1), nembed, 1, n, &
                FFTW_FORWARD, FFTW_ESTIMATE)
        CASE (1)
           CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, &
                arr_tmp(1,1,1), nembed, 1, n, arr_tmp(1,1,1), nembed, 1, n, &
                FFTW_BACKWARD, FFTW_ESTIMATE)
        END SELECT
 !
        DEALLOCATE(arr_tmp, stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOURCOL_RAA: Dellocation of  arr_tmp  failed!'
           STOP
        END IF
 !
     END SELECT
 !
     CALL dfftw_execute_dft(plan1d(id,NUM), arr(1,1,1), arr(1,1,1))
 !
   END SUBROUTINE fourcol_raa
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
   SUBROUTINE fourrow(arr, isign)
 !
 ! Dummy arguments
 !
     DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
     INTEGER,                 INTENT(IN)    :: isign
 !
 ! Local parameters
 !
     INTEGER, PARAMETER :: NUM=4, RANK=1
 !
 ! Local variables
 !
     INTEGER :: k
     INTEGER :: dim1, dim2, howmany, i, id, istat, n
     INTEGER, DIMENSION(RANK)             :: n_arr, nembed
     DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: arr_tmp
 !
     dim1 = SIZE(arr,1)
     dim2 = SIZE(arr,2)
     howmany = dim1
 !
 ! test if a plan that fits is already created.
 !
     id = -1
     k = isign*dim2
     DO i = 1,n1d_saved(NUM)
        IF (k == n1d_par(i,NUM)%par(1) .AND. &
             howmany == n1d_par(i,NUM)%par(2)) THEN
           id = i
           EXIT
        END IF
     END DO
 !
     SELECT CASE (id)
     CASE (-1)
 !
 ! test if the maximal number of plans is alredy reached.
 !
        IF (n1d_saved(NUM) == MXPLAN) THEN
           WRITE(*,*) 'FOURROW: MXPLAN too small! Increase it and recompile'
           STOP
        END IF
 !
        ALLOCATE(arr_tmp(dim1, dim2), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOURROW: Allocation of  arr_tmp  failed!'
           STOP
        END IF
 !
        nembed(1) = SIZE(arr)
        n_arr(1)  = SIZE(arr,2)
        n         = n_arr(1)
        howmany   = SIZE(arr,1)
 !
        n1d_saved(NUM) = n1d_saved(NUM)+1
        n1d_par(n1d_saved(NUM),NUM)%par(1) = k
        n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
        id = n1d_saved(NUM)
 !
        SELECT CASE (isign)
        CASE (-1)
           CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, &
                arr_tmp(1,1), nembed, howmany, 1, arr_tmp(1,1), nembed, howmany, 1, &
                FFTW_FORWARD, FFTW_ESTIMATE)
        CASE (1)
           CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, &
                arr_tmp(1,1), nembed, howmany, 1, arr_tmp(1,1), nembed, howmany, 1, &
                FFTW_BACKWARD, FFTW_ESTIMATE)
        END SELECT
 !
        DEALLOCATE(arr_tmp, stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOUR1: Dellocation of  arr_tmp  failed!'
           STOP
        END IF
 !
     END SELECT
 !
     CALL dfftw_execute_dft(plan1d(id,NUM), arr(1,1), arr(1,1))
 !
   END SUBROUTINE fourrow
 END MODULE fft
 #endif
 !
 #if defined(fft_essl)
 MODULE fft
 !
   IMPLICIT NONE
 !
   PRIVATE
 !
   PUBLIC :: four1, fourcol, fourrow
 !
   TYPE pointer_ra
      REAL, DIMENSION(:), POINTER :: poi_ra
   END TYPE pointer_ra
 !
   TYPE int_para
      INTEGER, DIMENSION(2) :: par
   END TYPE int_para
 !
 ! Global parameters
 !
   INTEGER, PARAMETER :: MXPLAN=16 ! define the maximum number of work arrays.
 !
 ! Global variables
 !
   EXTERNAL :: ENOTRM
   LOGICAL,                                            SAVE :: initflag=.TRUE.    ! initialization of the module
   CHARACTER (len=8),                                  SAVE :: S2015              ! string to copy error list entry
   REAL,             DIMENSION(8)                           :: aux1               ! auxilary array
   REAL,             DIMENSION(1)                           :: aux2               ! auxilary array
   TYPE(pointer_ra), DIMENSION(:,:),      ALLOCATABLE, SAVE :: aux1_poi, aux2_poi ! work arrays for the ESSL routine
   TYPE(int_para),   DIMENSION(MXPLAN,4),              SAVE :: n1d_par            ! size of transform
   INTEGER,          DIMENSION(4),                     SAVE :: n1d_saved=0        ! number of plans saved
 !
   INTERFACE fourcol
      MODULE PROCEDURE fourcol_ra, fourcol_raa
   END INTERFACE
 !
 CONTAINS
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
   SUBROUTINE four1(vec, isign)
 !
 ! Dummy arguments
 !
     DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: vec
     INTEGER,               INTENT(IN)    :: isign
 !
 ! Local parameters
 !
     INTEGER, PARAMETER :: NUM=1
 !
 ! Local variables
 !
     INTEGER :: i, id, istat, k, n, naux1, naux2
 !
     IF (initflag) THEN
        initflag = .FALSE.
 !
        CALL EINFO(0)
        CALL ERRSAV(2015,S2015)
 !
        ALLOCATE(aux1_poi(MXPLAN,4), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOUR1: 1. Allocation of  aux1_poi  failed!'
           STOP
        END IF
 !
        ALLOCATE(aux2_poi(MXPLAN,4), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOUR1: 1. Allocation of  aux2_poi  failed!'
           STOP
        END IF
 !
     END IF
 !
     n = SIZE(vec)
 !
 ! test if a plan that fits is already created.
 !
     id = -1
     k = isign*n
     DO i = 1,n1d_saved(NUM)
        IF (k == n1d_par(i,NUM)%par(1)) THEN
           id = i
           EXIT
        END IF
     END DO
 !
     SELECT CASE (id)
     CASE (-1)
 !
        n1d_saved(NUM) = n1d_saved(NUM)+1
        n1d_par(n1d_saved(NUM),NUM)%par(1) = k
        id = n1d_saved(NUM)
 !
        CALL ERRSET(2015,0,-1,1,ENOTRM,0)
 !
        naux1 = SIZE(aux1)
        naux2 = SIZE(aux2)
 !
        CALL dcft(1, vec(1), 1, n,  vec(1), 1, n, &
             n, 1, -isign, 1.0, aux1, naux1, aux2, naux2)
 !
        CALL ERRSTR(2015,S2015)
 !
 ! dynamic allocation of the work arrays.
 !
        ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOUR1: 2. Allocation of  aux1_poi  failed!'
           STOP
        ENDIF
 !
        ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOUR1: 2. Allocation of  aux2_poi  failed!'
           STOP
        ENDIF
 !
        CALL dcft(1, vec(1), 1, n, vec(1), 1, n, &
             n, 1, -isign, 1.0, &
             aux1_poi(id,NUM)%poi_ra(1), naux1, &
             aux2_poi(id,NUM)%poi_ra(1), naux2)
 !
     END SELECT
 !
     CALL dcft(0, vec(1), 1, n, vec(1), 1, n, &
          n, 1, -isign, 1.0,  &
          aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), &
          aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra))
 !
   END SUBROUTINE four1
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
   SUBROUTINE fourcol_ra(arr, isign)
 !
 ! Dummy arguments
 !
     DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
     INTEGER,                 INTENT(IN)    :: isign
 !
 ! Local parameters
 !
     INTEGER, PARAMETER :: NUM=2
 !
 ! Local variables
 !
     INTEGER :: dim1, howmany, i, id, istat, k, naux1, naux2
 !
     IF (initflag) THEN
        initflag = .FALSE.
 !
        CALL EINFO(0)
        CALL ERRSAV(2015,S2015)
 !
        ALLOCATE(aux1_poi(MXPLAN,4), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOURCOL_RA: 1. Allocation of  aux1_poi  failed!'
           STOP
        END IF
 !
        ALLOCATE(aux2_poi(MXPLAN,4), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOURCOL_RA: 1. Allocation of  aux2_poi  failed!'
           STOP
        END IF
 !
     END IF
 !
     dim1    = SIZE(arr,1)
     howmany = SIZE(arr,2)
 !
 ! test if a plan that fits is already created.
 !
     id = -1
     k = isign*dim1
     DO i = 1,n1d_saved(NUM)
        IF (k == n1d_par(i,NUM)%par(1) .AND. &
             howmany == n1d_par(i,NUM)%par(2)) THEN
           id = i
           EXIT
        END IF
     END DO
 !
     SELECT CASE (id)
     CASE (-1)
 !
        n1d_saved(NUM) = n1d_saved(NUM)+1
        n1d_par(n1d_saved(NUM),NUM)%par(1) = k
        n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
        id = n1d_saved(NUM)
 !
        CALL ERRSET(2015,0,-1,1,ENOTRM,0)
 !
        naux1 = SIZE(aux1)
        naux2 = SIZE(aux2)
 !
        CALL dcft(1, arr(1,1), 1, dim1,  arr(1,1), 1, dim1, &
             dim1, howmany, -isign, 1.0, aux1, naux1, aux2, naux2)
 !
        CALL ERRSTR(2015,S2015)
 !
 ! dynamic allocation of the work arrays.
 !
        ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOURCOL_RA: 2. Allocation of  aux1_poi  failed!'
           STOP
        ENDIF
 !
        ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOURCOL_RA: 2. Allocation of  aux2_poi  failed!'
           STOP
        ENDIF
 !
        CALL dcft(1, arr(1,1), 1, dim1, arr(1,1), 1, dim1, &
             dim1, howmany, -isign, 1.0, &
             aux1_poi(id,NUM)%poi_ra(1), naux1, &
             aux2_poi(id,NUM)%poi_ra(1), naux2)
 !
     END SELECT
 !
     CALL dcft(0, arr(1,1), 1, dim1, arr(1,1), 1, dim1, &
          dim1, howmany, -isign, 1.0, &
          aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), &
          aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra))
 !
   END SUBROUTINE fourcol_ra
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
   SUBROUTINE fourcol_raa(arr, isign)
 !
 ! Dummy arguments
 !
     DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr
     INTEGER,                   INTENT(IN)    :: isign
 !
 ! Local parameters
 !
     INTEGER, PARAMETER :: NUM=3
 !
 ! Local variables
 !
     INTEGER :: dim1, howmany, i, id, istat, k, naux1, naux2
 !
     IF (initflag) THEN
        initflag = .FALSE.
 !
        CALL EINFO(0)
        CALL ERRSAV(2015,S2015)
 !
        ALLOCATE(aux1_poi(MXPLAN,NUM), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOURCOL_RAA: 1. Allocation of  aux1_poi  failed!'
           STOP
        END IF
 !
        ALLOCATE(aux2_poi(MXPLAN,NUM), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOURCOL_RAA: 1. Allocation of  aux2_poi  failed!'
           STOP
        END IF
 !
     END IF
 !
     dim1    = SIZE(arr,1)
     howmany = SIZE(arr,2)*SIZE(arr,3)
 !
 ! test if a plan that fits is already created.
 !
     id = -1
     k = isign*dim1
     DO i = 1,n1d_saved(NUM)
        IF (k == n1d_par(i,NUM)%par(1) .AND. &
             howmany == n1d_par(i,NUM)%par(2)) THEN
           id = i
           EXIT
        END IF
     END DO
 !
     SELECT CASE (id)
     CASE (-1)
 !
        n1d_saved(NUM) = n1d_saved(NUM)+1
        n1d_par(n1d_saved(NUM),NUM)%par(1) = k
        n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
        id = n1d_saved(NUM)
 !
        CALL ERRSET(2015,0,-1,1,ENOTRM,0)
 !
        naux1 = SIZE(aux1)
        naux2 = SIZE(aux2)
 !
        CALL dcft(1, arr(1,1,1), 1, dim1, arr(1,1,1), 1, dim1, &
             dim1, howmany, -isign, 1.0, aux1, naux1, aux2, naux2)
 !
        CALL ERRSTR(2015,S2015)
 !
 ! dynamic allocation of the work arrays.
 !
        ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOURCOL_RAA: 2. Allocation of  aux1_poi  failed!'
           STOP
        ENDIF
 !
        ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOURCOL_RAA: 2. Allocation of  aux2_poi  failed!'
           STOP
        ENDIF
 !
        CALL dcft(1, arr(1,1,1), 1, dim1, arr(1,1,1), 1, dim1, &
             dim1, howmany, -isign, 1.0, &
             aux1_poi(id,NUM)%poi_ra(1), naux1, &
             aux2_poi(id,NUM)%poi_ra(1), naux2)
 !
     END SELECT
 !
     CALL dcft(0, arr(1,1,1), 1, dim1, arr(1,1,1), 1, dim1, &
          dim1, howmany, -isign, 1.0, &
             aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), &
             aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra))
 !
   END SUBROUTINE fourcol_raa
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
   SUBROUTINE fourrow(arr, isign)
 !
 ! Dummy arguments
 !
     DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
     INTEGER,                 INTENT(IN)    :: isign
 !
 ! Local parameters
 !
     INTEGER, PARAMETER :: NUM=4
 !
 ! Local variables
 !
     INTEGER :: dim1, dim2, i, id, istat, k, naux1, naux2
 !
     IF (initflag) THEN
        initflag = .FALSE.
 !
        CALL EINFO(0)
        CALL ERRSAV(2015,S2015)
 !
        ALLOCATE(aux1_poi(MXPLAN,NUM), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOURROW: 1. Allocation of  aux1_poi  failed!'
           STOP
        END IF
 !
        ALLOCATE(aux2_poi(MXPLAN,NUM), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOURROW: 1. Allocation of  aux2_poi  failed!'
           STOP
        END IF
 !
     END IF
 !
     dim1 = SIZE(arr,1)
     dim2 = SIZE(arr,2)
 !
 ! test if a plan that fits is already created.
 !
     id = -1
     k = isign*dim1
     DO i = 1,n1d_saved(NUM)
        IF (k == n1d_par(i,NUM)%par(1) .AND. &
             dim2 == n1d_par(i,NUM)%par(2)) THEN
           id = i
           EXIT
        END IF
     END DO
 !
     SELECT CASE (id)
     CASE (-1)
 !
        n1d_saved(NUM) = n1d_saved(NUM)+1
        n1d_par(n1d_saved(NUM),NUM)%par(1) = k
        n1d_par(n1d_saved(NUM),NUM)%par(2) = dim2
        id = n1d_saved(NUM)
 !
        CALL ERRSET(2015,0,-1,1,ENOTRM,0)
 !
        naux1 = SIZE(aux1)
        naux2 = SIZE(aux2)
 !
        CALL dcft(1, arr(1,1), dim2, 1,  arr(1,1), dim2, 1, &
             dim1, dim2, -isign, 1.0, aux1, naux1, aux2, naux2)
 !
        CALL ERRSTR(2015,S2015)
 !
 ! dynamic allocation of the work arrays.
 !
        ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOURROW: 2. Allocation of  aux1_poi  failed!'
           STOP
        ENDIF
 !
        ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat)
        IF (istat /= 0) THEN
           WRITE(*,*) 'FOURROW: 2. Allocation of  aux2_poi  failed!'
           STOP
        ENDIF
 !
        CALL dcft(1, arr(1,1), dim1, 1, arr(1,1), dim1, 1, &
             dim1, dim2, -isign, 1.0, &
             aux1_poi(id,NUM)%poi_ra(1), naux1, &
             aux2_poi(id,NUM)%poi_ra(1), naux2)
 !
     END SELECT
 !
     CALL dcft(0, arr(1,1), dim1, 1, arr(1,1), dim1, 1, &
          dim1, dim2, -isign, 1.0,  &
          aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), &
          aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra))
 !
   END SUBROUTINE fourrow
 END MODULE fft
 #endif
 !
 #if defined(fft_mkl)
 MODULE fft
 !
   USE mkl_dfti
 !
   IMPLICIT NONE
 !
   PRIVATE
 !
   PUBLIC :: pointer_r, handle1d
   PUBLIC :: four1, fourcol, fourrow
 !
   TYPE pointer_r
      TYPE(DFTI_DESCRIPTOR), POINTER :: desc_handle
   END TYPE pointer_r
 !
   TYPE int_para
      INTEGER, DIMENSION(2) :: par
   END TYPE int_para
 !
 ! Global parameters
 !
   INTEGER, PARAMETER :: MXPLAN=16 ! define the maximum number of plans.
 !
 ! Global variables
 !
   TYPE(DFTI_DESCRIPTOR),                      POINTER       :: desc_handle
   TYPE(pointer_r),       DIMENSION(MXPLAN,4),          SAVE :: handle1d    ! descriptor handles for 1-dim FFT
   TYPE(int_para),        DIMENSION(MXPLAN,4),          SAVE :: n1d_par     ! size of transform
   INTEGER,               DIMENSION(4),                 SAVE :: n1d_saved=0 ! number of descriptor handles saved
 !
   INTERFACE fourcol
      MODULE PROCEDURE fourcol_ra, fourcol_raa
   END INTERFACE
 !
 CONTAINS
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
   SUBROUTINE four1(vec, isign)
 !
 ! Dummy arguments
 !
     INTEGER,               INTENT(IN)    :: isign
     DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: vec
 !
 ! Local parameters
 !
     INTEGER, PARAMETER :: NUM=1
 !
 ! Local variables
 !
     INTEGER :: dim1, id, i, k, status
     LOGICAL :: init_flag
 !
 ! test if a plan that fits is already created.
 !
     id = -1
     k = isign*SIZE(vec)
     DO i = 1,n1d_saved(NUM)
        IF (k == n1d_par(i,NUM)%par(1)) THEN
           id = i
           EXIT
        END IF
     END DO
 !
     SELECT CASE (id)
     CASE (-1)
        init_flag = .TRUE.
        n1d_saved(NUM) = n1d_saved(NUM)+1
        n1d_par(n1d_saved(NUM),NUM)%par(1) = k
        id = n1d_saved(NUM)
 !
     CASE default
        init_flag = .FALSE.
     END SELECT
 !
     dim1 = SIZE(vec,1)
 !
     CALL fourcol_mkl(vec(1), dim1, 1, isign, init_flag, id, NUM)
 !
   END SUBROUTINE four1
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
   SUBROUTINE fourcol_ra(arr, isign)
 !
 ! Dummy arguments
 !
     INTEGER,                 INTENT(IN)    :: isign
     DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
 !
 ! Local parameters
 !
     INTEGER, PARAMETER :: NUM=2
 !
 ! Local variables
 !
     INTEGER :: dim1, howmany, id, i, k, status
     LOGICAL :: init_flag
 !
     dim1 = SIZE(arr,1)
     howmany = SIZE(arr,2)
 !
 ! test if a plan that fits is already created.
 !
     id = -1
     k = isign*SIZE(arr)
     DO i = 1,n1d_saved(NUM)
        IF (k == n1d_par(i,NUM)%par(1) .AND. &
             howmany == n1d_par(i,NUM)%par(2)) THEN
           id = i
           EXIT
        END IF
     END DO
 !
     SELECT CASE (id)
     CASE (-1)
        init_flag = .TRUE.
        n1d_saved(NUM) = n1d_saved(NUM)+1
        n1d_par(n1d_saved(NUM),NUM)%par(1) = k
        n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
        id = n1d_saved(NUM)
 !
     CASE default
        init_flag = .FALSE.
     END SELECT
 !
     CALL fourcol_mkl(arr(1,1), dim1, howmany, isign, init_flag, id, NUM)
 !
   END SUBROUTINE fourcol_ra
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
   SUBROUTINE fourcol_raa(arr, isign)
 !
 ! Dummy arguments
 !
     INTEGER,                   INTENT(IN)    :: isign
     DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr
 !
 ! Local parameters
 !
     INTEGER, PARAMETER :: NUM=3
 !
 ! Local variables
 !
     INTEGER :: dim1, howmany, id, i, k, status
     LOGICAL :: init_flag
 !
     dim1 = SIZE(arr,1)
     howmany = SIZE(arr,2)*SIZE(arr,3)
 !
 ! test if a plan that fits is already created.
 !
     id = -1
     k = isign*SIZE(arr)
     DO i = 1,n1d_saved(NUM)
        IF (k == n1d_par(i,NUM)%par(1) .AND. &
             howmany == n1d_par(i,NUM)%par(2)) THEN
           id = i
           EXIT
        END IF
     END DO
 !
     SELECT CASE (id)
     CASE (-1)
        init_flag = .TRUE.
        n1d_saved(NUM) = n1d_saved(NUM)+1
        n1d_par(n1d_saved(NUM),NUM)%par(1) = k
        n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
        id = n1d_saved(NUM)
 !
     CASE default
        init_flag = .FALSE.
     END SELECT
 !
     CALL fourcol_mkl(arr(1,1,1), dim1, howmany, isign, init_flag, id, NUM)
 !
   END SUBROUTINE fourcol_raa
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
   SUBROUTINE fourrow(arr, isign)
 !
 ! Dummy arguments
 !
     INTEGER,                 INTENT(IN)    :: isign
     DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
 !
 ! Local parameters
 !
     INTEGER, PARAMETER :: NUM=4
 !
 ! Local variables
 !
     INTEGER :: dim2, howmany, id, i, k, status
     LOGICAL :: init_flag
 !
     howmany = SIZE(arr,1)
     dim2    = SIZE(arr,2)
 !
 ! test if a plan that fits is already created.
 !
     id = -1
     k = isign*SIZE(arr)
     DO i = 1,n1d_saved(NUM)
        IF (k == n1d_par(i,NUM)%par(1) .AND. &
             howmany == n1d_par(i,NUM)%par(2)) THEN
           id = i
           EXIT
        END IF
     END DO
 !
     SELECT CASE (id)
     CASE (-1)
        init_flag = .TRUE.
        n1d_saved(NUM) = n1d_saved(NUM)+1
        n1d_par(n1d_saved(NUM),NUM)%par(1) = k
        n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
        id = n1d_saved(NUM)
 !
     CASE default
        init_flag = .FALSE.
     END SELECT
 !
     CALL fourrow_mkl(arr(1,1), howmany, dim2, isign, init_flag, id, NUM)
 !
   END SUBROUTINE fourrow
 END MODULE fft
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
 SUBROUTINE fourcol_mkl(arr, dim1, howmany, isign, init_flag, id, num)
 !
 ! COMMENT: This subroutine is necessary to prevent the Lahey/Fujitsu
 !          compiler from making a copy of array arr when passing
 !          arguments.
   USE fft, ONLY: handle1d
 !
   USE mkl_dfti
 !
   IMPLICIT NONE
 !
 ! Dummy arguments
 !
   DOUBLE COMPLEX, DIMENSION(*), INTENT(INOUT) :: arr
   INTEGER,               INTENT(IN)    :: dim1
   INTEGER,               INTENT(IN)    :: howmany
   INTEGER,               INTENT(IN)    :: isign
   LOGICAL,               INTENT(IN)    :: init_flag
   INTEGER,               INTENT(IN)    :: id
   INTEGER,               INTENT(IN)    :: num
 !
 ! Local variables
 !
   INTEGER :: i, status
 !
   IF (init_flag) THEN
 !
      status = DftiCreateDescriptor(handle1d(id,num)%desc_handle, &
           DFTI_DOUBLE, DFTI_COMPLEX, 1, dim1)
      IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 0:', DftiErrorMessage(status)
 
      status = DftiSetValue(handle1d(id,num)%desc_handle, &
           DFTI_NUMBER_OF_TRANSFORMS, howmany)
      IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 1:', DftiErrorMessage(status)
      status = DftiSetValue(handle1d(id,num)%desc_handle, &
           DFTI_INPUT_DISTANCE, dim1)
      IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 2:', DftiErrorMessage(status)
      status = DftiSetValue(handle1d(id,num)%desc_handle, &
           DFTI_OUTPUT_DISTANCE, dim1)
      IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 3:', DftiErrorMessage(status)
 !
      status = DftiCommitDescriptor(handle1d(id,num)%desc_handle)
      IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 4:', DftiErrorMessage(status)
 !
   END IF
 !
   SELECT CASE (isign)
   CASE (-1)
      status = DftiComputeForward(handle1d(id,num)%desc_handle, arr)
   CASE (1)
      status = DftiComputeBackward(handle1d(id,num)%desc_handle, arr)
   END SELECT
 !
 END SUBROUTINE fourcol_mkl
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !
 SUBROUTINE fourrow_mkl(arr, howmany, dim2, isign, init_flag, id, num)
 !
 ! COMMENT: This subroutine is necessary to prevent the Lahey/Fujitsu
 !          compiler from making a copy of array arr when passing
 !          arguments.
 !
   USE fft, ONLY: handle1d
 !
   USE mkl_dfti
 !
   IMPLICIT NONE
 !
 ! Dummy arguments
 !
   DOUBLE COMPLEX, DIMENSION(*), INTENT(INOUT) :: arr
   INTEGER,               INTENT(IN)    :: howmany
   INTEGER,               INTENT(IN)    :: dim2
   INTEGER,               INTENT(IN)    :: isign
   LOGICAL,               INTENT(IN)    :: init_flag
   INTEGER,               INTENT(IN)    :: id
   INTEGER,               INTENT(IN)    :: num
 !
 ! Local variables
 !
   INTEGER :: i, status
   INTEGER, DIMENSION(2) :: stride
 !
   IF (init_flag) THEN
 !
      stride(1) = 0
      stride(2) = howmany
 !
      status = DftiCreateDescriptor(handle1d(id,num)%desc_handle, &
           DFTI_DOUBLE, DFTI_COMPLEX, 1, dim2)
      IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 0:', DftiErrorMessage(status)
 
      status = DftiSetValue(handle1d(id,num)%desc_handle, &
           DFTI_NUMBER_OF_TRANSFORMS, dim2)
      IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 1:', DftiErrorMessage(status)
 !
      status = DftiSetValue(handle1d(id,num)%desc_handle, &
           DFTI_INPUT_DISTANCE, 1)
      IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 2:', DftiErrorMessage(status)
      status = DftiSetValue(handle1d(id,num)%desc_handle, &
           DFTI_INPUT_STRIDES, stride)
      IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 3:', DftiErrorMessage(status)
 !
      status = DftiSetValue(handle1d(id,num)%desc_handle, &
           DFTI_OUTPUT_DISTANCE, 1)
      IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 4:', DftiErrorMessage(status)
      status = DftiSetValue(handle1d(id,num)%desc_handle, &
           DFTI_OUTPUT_STRIDES, stride)
      IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 5:', DftiErrorMessage(status)
 !
      status = DftiCommitDescriptor(handle1d(id,num)%desc_handle)
      IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 6:', DftiErrorMessage(status)
 !
   END IF
 !
   SELECT CASE (isign)
   CASE (-1)
      status = DftiComputeForward(handle1d(id,num)%desc_handle, arr)
   CASE (1)
      status = DftiComputeBackward(handle1d(id,num)%desc_handle, arr)
   END SELECT
 !
 END SUBROUTINE fourrow_mkl
 #endif
 !
diff --git a/fft/fftw_f77.h b/fft/fftw_f77.h
index b825f23..a566a02 100644
--- a/fft/fftw_f77.h
+++ b/fft/fftw_f77.h
@@ -1,53 +1,53 @@
 !>
 !> @file fftw_f77.h
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !     This file contains PARAMETER statements for various constants
 !     that can be passed to FFTW routines.  You should include
 !     this file in any FORTRAN program that calls the fftw_f77
 !     routines (either directly or with an #include statement
 !     if you use the C preprocessor).
 
       integer FFTW_FORWARD,FFTW_BACKWARD
       parameter (FFTW_FORWARD=-1,FFTW_BACKWARD=1)
 
       integer FFTW_REAL_TO_COMPLEX,FFTW_COMPLEX_TO_REAL
       parameter (FFTW_REAL_TO_COMPLEX=-1,FFTW_COMPLEX_TO_REAL=1)
 
       integer FFTW_ESTIMATE,FFTW_MEASURE
       parameter (FFTW_ESTIMATE=0,FFTW_MEASURE=1)
 
       integer FFTW_OUT_OF_PLACE,FFTW_IN_PLACE,FFTW_USE_WISDOM
       parameter (FFTW_OUT_OF_PLACE=0)
       parameter (FFTW_IN_PLACE=8,FFTW_USE_WISDOM=16)
 
       integer FFTW_THREADSAFE
       parameter (FFTW_THREADSAFE=128)
 
 !     Constants for the MPI wrappers:
       integer FFTW_TRANSPOSED_ORDER, FFTW_NORMAL_ORDER
       integer FFTW_SCRAMBLED_INPUT, FFTW_SCRAMBLED_OUTPUT
       parameter(FFTW_TRANSPOSED_ORDER=1, FFTW_NORMAL_ORDER=0)
       parameter(FFTW_SCRAMBLED_INPUT=8192)
       parameter(FFTW_SCRAMBLED_OUTPUT=16384)
diff --git a/fft/tfft.f90 b/fft/tfft.f90
index 866a66b..18da423 100644
--- a/fft/tfft.f90
+++ b/fft/tfft.f90
@@ -1,141 +1,141 @@
 !>
 !> @file tfft.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !  Test the FFT routines exported by module fft
 !
   USE fft
   IMPLICIT NONE
   INTEGER :: nx=8
   DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: a,b,c
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rn1, rn2
   DOUBLE PRECISION :: pi, argx
   INTEGER :: ix, nx0=2
 !
   PRINT*, 'Enter array dim. nx'
   READ(*,*) nx
   ALLOCATE(a(nx,4), b(nx,4), c(4,nx))
   ALLOCATE(rn1(nx,4), rn2(nx,4))
 !
 !  Create initial array
   pi = 4.0d0*ATAN(1.0d0)
   WRITE(*,*) 'Enter mode nx0'
   READ(*,*) nx0
   DO ix=0,nx-1
      argx = 2.0d0*pi/nx*nx0*ix
      a(ix+1,:) = COS(argx)
   END DO
 !
   WRITE(*,*) '-----------------------'
   WRITE(*,*) 'Reals of original array'
   WRITE(*,*) '-----------------------'
   WRITE(*,'(10f10.4)') REAL(a)
 !________________________________________________________________________________
 !
   WRITE(*,*) '-----------------'
   WRITE(*,*) 'testing four1 ...'
   WRITE(*,*) '-----------------'
 !
 !  Forward transform
   b=a
   CALL four1(b(:,1), -1)
 !
 !  Check
   WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1))
   WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1))
 !
 !  Backward transform
   CALL four1(b(:,1), 1)
   b = b/REAL(nx)
 !
 !  Check
   WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1))
   WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1))
 !
   b(:,1) = b(:,1)-a(:,1)
   rn1(:,1) = REAL(b(:,1),8)
   rn2(:,1) = AIMAG(b(:,1))
   PRINT *, 'Min. max err of real', MINVAL(rn1(:,1)), MAXVAL(rn1(:,1))
   PRINT *, 'Min. max err of imag', MINVAL(rn2(:,1)), MAXVAL(rn2(:,1))
 !________________________________________________________________________________
 !
   WRITE(*,*) '-------------------'
   WRITE(*,*) 'testing fourcol ...'
   WRITE(*,*) '-------------------'
 !
 !  Forward transform
   b=a
   CALL fourcol(b(:,:), -1)
 !
 !  Check
   WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1))
   WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1))
 !
 !  Backward transform
   CALL fourcol(b(:,:), 1)
   b = b/REAL(nx)
 !
 !  Check
   WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1))
   WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1))
 !
   b = b-a
   rn1 = REAL(b,8)
   rn2 = AIMAG(b)
   PRINT *, 'Min. max err of real', MINVAL(rn1), MAXVAL(rn1)
   PRINT *, 'Min. max err of imag', MINVAL(rn2), MAXVAL(rn2)
 !________________________________________________________________________________
 !
   WRITE(*,*) '-------------------'
   WRITE(*,*) 'testing fourrow ...'
   WRITE(*,*) '-------------------'
 !
 !  Forward transform
   b=a
   c = TRANSPOSE(b)
   CALL fourrow(c, -1)
 !
 !  Check
   WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(c(1,:))
   WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(c(1,:))
 !
 !  Backward transform
   CALL fourrow(c, 1)
   c = c/REAL(nx)
 !
 !  Check
   WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(c(1,:))
   WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(c(1,:))
 !
   b = TRANSPOSE(c)-a
   rn1 = REAL(b,8)
   rn2 = AIMAG(b)
   PRINT *, 'Min. max err of real', MINVAL(rn1), MAXVAL(rn1)
   PRINT *, 'Min. max err of imag', MINVAL(rn2), MAXVAL(rn2)
 !
 !  Clean up
   DEALLOCATE(a,b,c, rn1,rn2)
 END PROGRAM main
diff --git a/matlab/cds_mat.m b/matlab/cds_mat.m
index e2a79d4..25e1c65 100644
--- a/matlab/cds_mat.m
+++ b/matlab/cds_mat.m
@@ -1,44 +1,44 @@
 %
 % @file cds_mat.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 function [mata, diag] = cds_mat(file, dset)
     n=double(hdf5read(file,dset, 'RANK'));
     dists=double(hdf5read(file, strcat(dset,'/dists'))); 
     val=hdf5read(file, strcat(dset,'/vals'));
     
     %%    Shift the off-diagonals  %%
     for k=1:length(dists)
         d=dists(k);
         if d < 0
             val(1:n+d,k) = val(1-d:n,k);
         elseif d > 0
             val(n:-1:d+1,k) = val(n-d:-1:1,k);
         end
     end
     mata = spdiags(val, dists, n,n);
     if nargout == 2
         idiag = find(dists==0);
         diag = val(:,idiag);
     end
diff --git a/matlab/csr_mat.m b/matlab/csr_mat.m
index ddfa679..7dd9ac5 100644
--- a/matlab/csr_mat.m
+++ b/matlab/csr_mat.m
@@ -1,45 +1,45 @@
 %
 % @file csr_mat.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 function [mata, diag] = csr_mat(file, dset)
     n=hdf5read(file,dset, 'RANK');
     nnz=hdf5read(file,dset, 'NNZ');
     cols=hdf5read(file, strcat(dset,'/cols')); 
     irow=hdf5read(file, strcat(dset,'/irow')); 
     val=hdf5read(file, strcat(dset,'/val')); 
     idiag=hdf5read(file, strcat(dset,'/idiag')); 
     for i=1:n
         s = irow(i);
         e = irow(i+1)-1;
         rows(s:e) = i;    
     end
     cols=double(cols);
     rows=double(rows);
     vals = double(val);
     mata = sparse(rows,cols,vals);
     if nargout == 2
         diag = val(idiag);
     end
     
diff --git a/matlab/driv1.m b/matlab/driv1.m
index 2d28ea7..cafb273 100644
--- a/matlab/driv1.m
+++ b/matlab/driv1.m
@@ -1,68 +1,68 @@
 %
 % @file driv1.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='driv1.h5';
 
 x = hdf5read(file, '/X');
 knotsx = hdf5read(file, 'KNOTSX');
 splinesx = hdf5read(file, '/splinesx','V71Dimensions', true);
 y = hdf5read(file, '/Y');
 knotsy = hdf5read(file, 'KNOTSY');
 splinesy = hdf5read(file, '/splinesy','V71Dimensions', true);
 
 c=['b','g','r','c','m','y','k'];
 nc=size(c,2);
 
 figure
 %subplot(211)
 hold on
 ns = size(splinesx,1);
 attr=hdf5read(file,'/splinesx/title'); title_ann=attr.Data;
 for i = 1:ns
     cc = mod(i-1,nc)+1;
     plot(x,splinesx(i,:),c(cc))
 end
 yk=zeros(size(knotsx));
 plot(knotsx,yk,'ro');
 grid on
 title(title_ann)
 ylabel('Splines')
 
 figure
 %subplot(212)
 hold on
 ns = size(splinesy,1);
 attr=hdf5read(file,'/splinesy/title'); title_ann=attr.Data;
 for i = 1:ns
     cc = mod(i-1,nc)+1;
     plot(y,splinesy(i,:),c(cc))
 end
 yk=zeros(size(knotsy));
 i1=find(knotsy==y(1));
 i2=find(knotsy>=y(size(y,1)),1);
 plot(knotsy(i1:i2),yk(i1:i2),'ro');
 grid on
 title(title_ann)
 ylabel('Splines')
diff --git a/matlab/fit.m b/matlab/fit.m
index 64b90e8..7f6674a 100644
--- a/matlab/fit.m
+++ b/matlab/fit.m
@@ -1,38 +1,38 @@
 %
 % @file fit.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 P1=polyfit(log(n),log(err1),1)
 P2=polyfit(log(n),log(err2),1)
 P3=polyfit(log(n),log(err3),1)
 
 figure
 loglog(n,err1,'o', n, exp(P1(2)).*n.^P1(1), 'b')
 hold on
 loglog(n,err2,'rh', n, exp(P2(2)).*n.^P2(1),'r')
 loglog(n,err3,'*k', n, exp(P3(2)).*n.^P3(1), 'k')
 grid on
 xlabel('Number of intervals N');
 ylabel('Discretization Error')
 title('2D Cylindrical problem with m=1, s=10')
diff --git a/matlab/fit1d.m b/matlab/fit1d.m
index 6475e77..f96663d 100644
--- a/matlab/fit1d.m
+++ b/matlab/fit1d.m
@@ -1,94 +1,94 @@
 %
 % @file fit1d.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='fit1d.h5';
 
 x = hdf5read(file, '/X');
 f =  hdf5read(file, '/FCALC');
 fexact =  hdf5read(file, '/FEXACT');
 error =  hdf5read(file, '/ERROR');
 
 f1 =  hdf5read(file, '/FCALC1');
 fexact1 =  hdf5read(file, '/FEXACT1');
 error1 =  hdf5read(file, '/ERROR1');
 
 splines = hdf5read(file, '/SPLINES'); splines = splines';
 %
 %     Attributes
 %
 nidbas = hdf5read(file,'/NIDBAS');
 nx = hdf5read(file,'/NX');
 attr=hdf5read(file,'/FEXACT/title'); fexact_ann=attr.Data;
 attr=hdf5read(file,'/FCALC/title'); f_ann=attr.Data;
 attr=hdf5read(file,'/ERROR/title'); error_ann=attr.Data;
 
 attr=hdf5read(file,'/FEXACT1/title'); fexact1_ann=attr.Data;
 attr=hdf5read(file,'/FCALC1/title'); f1_ann=attr.Data;
 attr=hdf5read(file,'/ERROR1/title'); error1_ann=attr.Data;
 
 label=sprintf('Splines of degree %d, NX =%d', nidbas, nx);
 
 ns = size(splines,1);
 
 
 c=['b','g','r','c','m','y','k'];
 nc=size(c,2);
 
 figure
 subplot(511)
 hold on
 for i = 1:ns
     cc = mod(i-1,nc)+1;
     plot(x,splines(i,:),c(cc))
 end
 grid on
 ylabel('Splines')
 xlabel('X')
 title(label);
 hold off
 
 subplot(512)
 plot(x, f, 'o', x, fexact)
 legend(f_ann, fexact_ann)
 xlabel('X')
 grid on
 
 subplot(513)
 plot(x, error)
 ylabel(error_ann)
 xlabel('X')
 grid on
 
 subplot(514)
 plot(x, f1, 'h', x, fexact1)
 legend(f1_ann, fexact1_ann)
 xlabel('X')
 grid on
 
 subplot(515)
 plot(x, error1)
 ylabel(error1_ann)
 xlabel('X')
 grid on
diff --git a/matlab/fit2d.m b/matlab/fit2d.m
index f225459..3d69116 100644
--- a/matlab/fit2d.m
+++ b/matlab/fit2d.m
@@ -1,80 +1,80 @@
 %
 % @file fit2d.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='fit2d.h5';
 %
 %     Get data from data sets
 %
 r=hdf5read(file,'/xpt');
 t=hdf5read(file,'/ypt');
 
 fcalc=hdf5read(file,'/fcalc');
 fexact=hdf5read(file,'/fexact');
 errs=hdf5read(file,'/errs');
 %
 %     Attributes
 %
 NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
 NIDBAS1=hdf5read(file,'/NIDBAS1');
 NIDBAS2=hdf5read(file,'/NIDBAS2');
 MBESS=hdf5read(file,'/MBESS');
 LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ...
               NIDBAS1, NIDBAS2, MBESS);
 
 attr=hdf5read(file,'/xpt/title'); x_ann=attr.Data;
 attr=hdf5read(file,'/ypt/title'); y_ann=attr.Data;
 attr=hdf5read(file,'/fcalc/title'); fcalc_ann=attr.Data;
 attr=hdf5read(file,'/fexact/title'); fexact_ann=attr.Data;
 attr=hdf5read(file,'/errs/title');errs_ann=attr.Data;
 
 [R,T]=meshgrid(r,t);
 x = R.*cos(T); y= R.*sin(T);
 
 figure
 subplot(221)
 pcolor(double(x),double(y),double(fcalc));
 shading interp
 xlabel('X'); ylabel('Y')
 title(LABEL)
 colorbar
 
 subplot(222)
 pcolor(double(x),double(y),double(fexact))
 shading interp
 axis image
 xlabel('X'); ylabel('Y')
 title('X-Y plane')
 colorbar
 
 subplot(223)
 surfc(double(x),double(y),double(errs))
 xlabel('X'); ylabel('Y');
 title(errs_ann)
 
 %% Plot error at theta ~ pi/4
 k = max(find(t<pi/4));
 err=fcalc(k,:)-fexact(k,:);
 figure
 plot(r, err)
diff --git a/matlab/fitlog.m b/matlab/fitlog.m
index 29fe33f..e1410d6 100644
--- a/matlab/fitlog.m
+++ b/matlab/fitlog.m
@@ -1,28 +1,28 @@
 %
 % @file fitlog.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 function [yfit, P] = fitlog(x,y)
 P=polyfit(log(x),log(y),1);
 yfit = exp(P(2)).*x.^P(1);
diff --git a/matlab/fourier_gs.m b/matlab/fourier_gs.m
index 09bbb05..d8e94bb 100644
--- a/matlab/fourier_gs.m
+++ b/matlab/fourier_gs.m
@@ -1,69 +1,69 @@
 %
 % @file fourier_gs.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 clear all
 tau=0;
 alpha=0.5;
 
 theta=-pi:0.02:pi;
 [x,y] = meshgrid(theta,theta);
 n1=length(theta);
 n2=n1;
 
 %%%%
 %%%% Gauss-Seidel relaxations
 %%%%
 str_title=sprintf('tau = %.1f, alpha = %.2f',tau, alpha);
 ee = exp(i.*theta);
 eep= conj(ee);
 csin= alpha.*complex(alpha, (tau/2).*imag(ee));
 G=zeros(n1,n2);
 for ii=1:n1
     for jj=1:n2
         num = ee(ii) + csin(ii)*ee(jj);
         G(ii,jj) = num / (2*(1+alpha^2) - conj(num));
     end
 end
 
 
 figure
 hold off
 G0=(ee+csin)./(2*(1+alpha^2)-(eep+conj(csin)));
 plot(theta, abs(G(:,1)), 'r', 'LineWidth', 2)
 hold on
 plot(theta, abs(G0), 'g', 'LineWidth', 2)
 for jj=1:20:n2
     plot(theta, abs(G(:,jj)), 'b')
 end
 xlabel('\theta_1'); ylabel('Amplification Factor for Gauss-Seidel')
 title(str_title)
 
 % $$$ figure
 % $$$ mesh(x,y,abs(G))
 % $$$ xlabel('\theta_1'); ylabel('\theta_2')
 % $$$ title(str_title);
 % $$$ view(-120,25)
 
 max(max(abs(G)))
diff --git a/matlab/fourier_jac.m b/matlab/fourier_jac.m
index 4e9da71..ae518ab 100644
--- a/matlab/fourier_jac.m
+++ b/matlab/fourier_jac.m
@@ -1,69 +1,69 @@
 %
 % @file fourier_jac.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 clear all
 omega=0.8;
 tau=-2;
 alpha=1;
 
 c=2*omega/(1+alpha^2);
 str_title=sprintf('omega =  %.1f, tau = %.1f, alpha = %.2f', omega, ...
                   tau, alpha) 
 
 theta1=-pi:0.01:pi;
 theta2=-pi:0.01:pi;
 [x,y] = meshgrid(theta1,theta2);
 
 n1=length(theta1);
 n2=length(theta2);
 
 %%%%
 %%%% Damped Jacobi relaxations
 %%%%
 G=zeros(n1,n2);
 for ii=1:n1
     for jj=1:n2
         G(ii,jj) = 1-c.*( sin(theta1(ii)/2)^2 + alpha^2*sin(theta2(jj)/2)^2 ...
                  + 0.25*alpha*tau*sin(theta1(ii))*sin(theta2(jj)) );
     end
 end
 
 figure
 hold off
 G0 = 1-c.*sin(theta1./2).^2;
 plot(theta1, G(:,1), 'r', 'LineWidth', 2)
 hold on
 plot(theta1, G0, 'g', 'LineWidth', 2)
 for jj=1:20:n2
     plot(theta1, G(:,jj), 'b')
 end
 xlabel('\theta_1'); ylabel('Amplification Factor for Jacobi')
 title(str_title)
 % $$$ figure
 % $$$ mesh(x,y,G)
 % $$$ xlabel('\theta_1'); ylabel('\theta_2')
 % $$$ title(str_title);
 
 max(max(abs(G)))
diff --git a/matlab/fourier_smooth.m b/matlab/fourier_smooth.m
index 45e8a7d..ed3ddd8 100644
--- a/matlab/fourier_smooth.m
+++ b/matlab/fourier_smooth.m
@@ -1,106 +1,106 @@
 %
 % @file fourier_smooth.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 %   Brute force computation of \mu = MAX |G| for
 %   damped Jacobi and Gauss-Seidel relaxation
 %
 %   Find optimal damping factor \omega for Jacobi relaxations
 %
 if ~exist('tau','var'), tau=0; end
 if ~exist('alpha','var'), alpha=1.0; end
 dth=0.01;
 
 theta1=-pi+dth:dth:pi;
 theta2=-pi+dth:dth:pi;
 nth1=length(theta1);
 nth2=length(theta2);
 
 sint12=sin(theta1./2).^2;
 sint22=sin(theta2./2).^2;
 
 sint1=sin(theta1);
 sint2=sin(theta2);
 [S1,S2]=meshgrid(sint1,sint2);
 ctau=0.25*alpha*tau;
 
 omega=0.5:0.002:1;
 n=length(omega);
 G = zeros(nth1,nth2);
 for i=1:n
   c=2*omega(i)/(1+alpha^2);
   for i1=1:nth1
       for i2=1:nth2
           if or(abs(theta1(i1))>= pi/2, abs(theta2(i2)) >= pi/2);
               G(i1,i2) = abs(1-c*(sint12(i1)+alpha^2*sint22(i2) + ...
                                  ctau*sint1(i1)*sint2(i2)));
           end
       end
   end
   [gmax,imax]=max(G);
   [mu(i),jmax]=max(gmax);
   theta1_opt(i) = theta1(imax(jmax));
   theta2_opt(i) = theta2(jmax);
 end
 [mu_min,i_min]=min(mu);
 omega_opt=omega(i_min);
 str_title=sprintf(['omega = %.3f, mu = %.3f, alpha = %.2f, tau = ' ...
 '%.1f'], omega_opt, mu_min, alpha, tau);
 
 figure
 subplot(211)
 plot(omega,mu,'LineWidth',2);
 xlabel('\omega'); ylabel('\mu')
 grid on
 title(str_title);
 subplot(212)
 plot(omega, theta1_opt, omega, theta2_opt,'LineWidth',2);
 legend('\theta_{1opt}', '\theta_{2opt}')
 xlabel('\omega'); ylabel('optimum \theta')
 grid on
 %
 %   \mu for Gauss-Seidel relaxation
 %
 Ggs = zeros(nth1,nth2);
 exp1=complex(cos(theta1),sin(theta1));
 exp2=complex(cos(theta2),sin(theta2));
 c=2*(1+alpha^2);
 ctau=complex(alpha^2, (0.5*alpha*tau).*sint1);
 for i1=1:nth1
     for i2=1:nth2
         if or(abs(theta1(i1))>= pi/2, abs(theta2(i2)) >= pi/2);
             Num = exp1(i1) + ctau(i1)*exp2(i2);
             Ggs(i1,i2) = abs( Num/(c-conj(Num)) );
         end
     end
 end
 [gmax,imax]=max(Ggs);
 [mugs,jmax]=max(gmax);
 theta1_gs_opt = theta1(imax(jmax));
 theta2_gs_opt = theta2(jmax);
 fprintf('alpha = %.2f, tau = %.1f, Ggs = %.4f, theta1 = %.4f, theta2 = %.4f\n', alpha, tau, ...
         mugs, theta1_gs_opt, theta2_gs_opt);
 subplot(211)
 hold on
 plot(omega,repmat(mugs,1,length(omega)),'r--','LineWidth',2)
diff --git a/matlab/gb_mat.m b/matlab/gb_mat.m
index a15b31d..64440af 100644
--- a/matlab/gb_mat.m
+++ b/matlab/gb_mat.m
@@ -1,43 +1,43 @@
 %
 % @file gb_mat.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 function [mata] = gb_mat(file, dset)
 rank=h5readatt(file, dset, 'RANK');
 ku=h5readatt(file, dset, 'KU');
 kl=h5readatt(file, dset, 'KL');
 gbmat=h5read(file, dset);
 m=rank; n=rank;
 mata = zeros(m,n);
 for i=1:m
     jmin = max(1,i-kl);
     jmax = min(n,i+ku);
     for j=jmin:jmax
         ib = kl+ku+i-j+1;
         mata(i,j)=gbmat(ib,j);
     end
 end
 clear gbmat;
 
     
\ No newline at end of file
diff --git a/matlab/gbmat.m b/matlab/gbmat.m
index 3bb7c5e..dc14e74 100644
--- a/matlab/gbmat.m
+++ b/matlab/gbmat.m
@@ -1,60 +1,60 @@
 %
 % @file gbmat.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='pde2d.h5';
 %
 %  Attributes of GB matrix
 %
 NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
 NIDBAS1=hdf5read(file,'/NIDBAS1');
 NIDBAS2=hdf5read(file,'/NIDBAS2');
 
 %mat='/MAT1';
 rank=hdf5read(file,strcat(mat,'/RANK'));
 ku=hdf5read(file,strcat(mat,'/KU'));
 kl=hdf5read(file,strcat(mat,'/KL'));
 gb_mat=hdf5read(file,mat);
 rhs0=hdf5read(file,'/RHS');
 sol0=hdf5read(file,'/SOL');
 %
 m=rank; n=rank;
 a = zeros(m,n);
 for i=1:m
     jmin = max(1,i-kl);
     jmax = min(n,i+ku);
     for j=jmin:jmax
         ib = kl+ku+i-j+1;
         a(i,j)=gb_mat(ib,j);
     end
 end
 %clear gb_mat;
 S = sparse(a);
 %clear a;
 figure
 spy(S);
 LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), rank = %d', NR, NTH, ...
               NIDBAS1, NIDBAS2, rank);
 title(LABEL)
 
diff --git a/matlab/gs_fd.m b/matlab/gs_fd.m
index 8e8c1b6..e812c93 100644
--- a/matlab/gs_fd.m
+++ b/matlab/gs_fd.m
@@ -1,66 +1,66 @@
 %
 % @file gs_fd.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 clear all
 
 N=1024; 
 L=100;
 n0=1;
 Narr=[128 256 512 1024];
 
 for i=1:length(Narr);
     N=Narr(i);
     h=L/N; h2=h^2;
     title_str=sprintf(['N=%d, L=%.1f, n0=%.1f'], N, L, n0);
     v0=-2.+n0*h2;
     u0=1.;
     dom(i)= 2*abs(u0)/abs(v0);
     v=v0.*ones(N,1);
     u=u0*ones(N-1,1);
     mata=diag(u,1) + diag(u,-1) + diag(v);
     matl= tril(mata,0); % D+L
     lambda = eig(-triu(mata,1),matl);
     rho(i)=max(abs(lambda));
     
 % $$$     figure
 % $$$     plot(lambda,'o')
 % $$$     xlabel('Real eigenvalue'), ylabel('Imag eigenvalue')
 % $$$     grid on
 % $$$     axis equal
 % $$$     title_str=sprintf(['N=%d, L=%.1f, n0=%.1f, Spec. Radius=%.4f, dom=%.4f'], N, L, ...
 % $$$                       n0, rho(i),dom(i));
 % $$$     title(title_str);
     fprintf(1, 'Spectral Radius of GS relaxation matrix = %.4f, dom=%.4f\n', ...
             rho(i),dom(i))
 end
 
 figure
 plot(Narr, rho,'o-')
 xlabel('N'); ylabel('GS spectral radius')
 title_str=sprintf(['FD scheme, L=%.1f, n0=%.1f'], L, n0);
 title(title_str);
 grid on
 
 
diff --git a/matlab/gs_fe.m b/matlab/gs_fe.m
index 7dfaa33..9f77cec 100644
--- a/matlab/gs_fe.m
+++ b/matlab/gs_fe.m
@@ -1,66 +1,66 @@
 %
 % @file gs_fe.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 clear all
 
 N=1024; 
 L=100;
 n0=1;
 Narr=[128 256 512 1024];
 
 for i=1:length(Narr);
     N=Narr(i);
     h=L/N; h2=h^2;
     title_str=sprintf(['N=%d, L=%.1f, n0=%.1f'], N, L, n0);
     v0=-2.+2.0*n0*h2/6.0;
     u0=1.+n0*h2/6.0;
     dom(i)= 2*abs(u0)/abs(v0);
     v=v0.*ones(N,1);
     u=u0*ones(N-1,1);
     mata=diag(u,1) + diag(u,-1) + diag(v);
     matl= tril(mata,0); % D+L
     lambda = eig(-triu(mata,1),matl);
     rho(i)=max(abs(lambda));
     
 % $$$     figure
 % $$$     plot(lambda,'o')
 % $$$     xlabel('Real eigenvalue'), ylabel('Imag eigenvalue')
 % $$$     grid on
 % $$$     axis equal
 % $$$     title_str=sprintf(['N=%d, L=%.1f, n0=%.1f, Spec. Radius=%.4f, dom=%.4f'], N, L, ...
 % $$$                       n0, rho(i),dom(i));
 % $$$     title(title_str);
     fprintf(1, 'Spectral Radius of GS relaxation matrix = %.4f, dom=%.4f\n', ...
             rho(i),dom(i))
 end
 
 figure
 plot(Narr, rho,'o-')
 xlabel('N'); ylabel('GS spectral radius')
 title_str=sprintf(['FE scheme, L=%.1f, n0=%.1f'], L, n0);
 title(title_str);
 grid on
 
 
diff --git a/matlab/h5Complex.m b/matlab/h5Complex.m
index ab338f9..c2f8774 100644
--- a/matlab/h5Complex.m
+++ b/matlab/h5Complex.m
@@ -1,50 +1,50 @@
 %
 % @file h5Complex.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 function z = h5Complex(file, dset)
     data = hdf5read(file, dset);
     dims = size(data);
     rank = size(dims,2);
     switch rank
       case {1}
         for i=1:dims(1)
               z(i)=complex(cell2mat(data(i,1).Data(1)), cell2mat(data(i,1).Data(2)));
         end
       case {2}
         for i=1:dims(1)
             for j=1:dims(2)
                 z(i,j)=complex(cell2mat(data(i,j).Data(1)), cell2mat(data(i,j).Data(2)));
             end
         end
       case {3}
         for i=1:dims(1)
             for j=1:dims(2)
                 for k=1:dims(3)
                     z(i,j,k)=complex(cell2mat(data(i,j,k).Data(1)), cell2mat(data(i,j,k).Data(2)));
                 end
             end
         
         end
     end
diff --git a/matlab/h5Complex_ll.m b/matlab/h5Complex_ll.m
index ad43007..de9bf9d 100644
--- a/matlab/h5Complex_ll.m
+++ b/matlab/h5Complex_ll.m
@@ -1,33 +1,33 @@
 %
 % @file h5Complex_ll.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 function z = h5Complex_ll(file, dset)
     fid=H5F.open(file, 'H5F_ACC_RDONLY', 'H5P_DEFAULT');
     dset_id=H5D.open(fid, dset);
     dxpl = 'H5P_DEFAULT';
     data = H5D.read(dset_id,'H5ML_DEFAULT','H5S_ALL','H5S_ALL', dxpl);
     z = complex(data.real, data.imaginary);
     H5D.close(dset_id);
     H5F.close(fid);
diff --git a/matlab/jac_opt.m b/matlab/jac_opt.m
index 82d6f70..04b2c09 100644
--- a/matlab/jac_opt.m
+++ b/matlab/jac_opt.m
@@ -1,78 +1,78 @@
 %
 % @file jac_opt.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 %
 %   Find optimal damping factor \omega for Jacobi relaxations
 %
 clear all
 alpha=0.5;
 
 theta2=pi/2:0.01:pi;
 theta1=0:0.01:pi;
 [x,y]=meshgrid(theta1,theta1);
 
 sint12=sin(theta1./2).^2;
 sint22=sin(theta2./2).^2;
 [ksi,eta]=meshgrid(sint12,sint22);
 
 nth1=length(theta1);
 nth2=length(theta2);
 
 omega=0.1:0.01:1;
 n=length(omega);
 for i=1:n
   c=2*omega(i)/(1+alpha^2);
   G=abs(1 - c.*(ksi+(alpha^2).*eta ));
   [gmax,imax]=max(G);
   [mu(i),jmax]=max(gmax);
   eta_opt(i)=eta(imax(jmax),jmax);
   ksi_opt(i)=ksi(imax(jmax),jmax);
 end
 [mu_min,i_min]=min(mu);
 omega_opt=omega(i_min);
 str_title=sprintf('omega = %.3f, mu = %.3f, alpha = %.2f', omega_opt, ...
                   mu_min, alpha); 
 
 figure
 plot(omega,mu,'o-', omega, ksi_opt, '*-', omega, eta_opt, '^-');
 legend('\mu', '\xi_{opt}', '\eta_{opt}')
 xlabel('\omega'); ylabel('\mu')
 grid on
 title(str_title);
 
 % $$$ 
 % $$$ c=2*omega_opt/(1+alpha^2);
 % $$$ G=1-c.*(ksi+(alpha^2).*eta);
 % $$$ figure
 % $$$ hold off
 % $$$ plot(theta1, G(1,:), 'r', 'LineWidth', 2)
 % $$$ hold on
 % $$$ plot(theta1, G(nth2,:), 'g', 'LineWidth', 2)
 % $$$ for jj=1:20:nth2
 % $$$     plot(theta1, G(jj,:), 'b')
 % $$$ end
 % $$$ xlabel('\theta_1'); ylabel('Amplification Factor for Jacobi')
 % $$$ title(str_title)
 % $$$ grid on
diff --git a/matlab/modes.m b/matlab/modes.m
index ccb5c06..0dd4521 100644
--- a/matlab/modes.m
+++ b/matlab/modes.m
@@ -1,84 +1,84 @@
 %
 % @file modes.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 N=8;
 kmodes=N-1;
 
 %%  modes on fine grid %%
 xh=(1/N).*(0:1:N);
 wh=zeros(N+1,kmodes);
 for k=1:kmodes
     wh(:,k) = sin((k*pi).*xh);
 end
 
 %%  Restriction %%
 R=[
 1 0 0 0 0 0 0 0 0
 0 0.5 1 0.5 0 0 0 0 0
 0 0 0 0.5 1 0.5 0 0 0
 0 0 0 0 0 0.5 1 0.5 0
 0 0 0 0 0 0 0 0 1
 ];
 
 %% Null space of Restriction %%
 ns = [
      0     0     0     0
      2     0     0     0
     -1    -1     0     0
      0     2     0     0
      0    -1    -1     0
      0     0     2     0
      0     0    -1    -1
      0     0     0     2
      0     0     0     0];
 
 figure
 subplot(211)
 plot(ns,'o-');
 title('Basis of Null space of Restriction')
 subplot(212)
 plot(R','o-');
 title('Basis of Range of Prolongation')
 
 
 %%  modes on coarse grid %%
 N2h = N/2;
 x2h=(1/N2h).*(0:1:N2h);
 w2h = R*wh;
 
 x=0:0.01:1.;
 figure
 for k=1:kmodes
     subplot(3,3,k)
     plot(xh,wh(:,k),'o', x, sin((k*pi).*x),'b-', x2h, w2h(:,k), 'ro-');
     grid on
 end
 
 figure
 for k=1:N/2
     subplot(2,2,k)
     plot(xh,wh(:,k),'o-', xh,wh(:,N-k),'r*-')
     grid on
 end
diff --git a/matlab/pde1d.m b/matlab/pde1d.m
index 86b6642..3f67aa7 100644
--- a/matlab/pde1d.m
+++ b/matlab/pde1d.m
@@ -1,72 +1,72 @@
 %
 % @file pde1d.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='pde1d.h5';
 fprintf(1,'NX     = %d\n', hdf5read(file,'/NX'));
 fprintf(1,'NIDBAS = %d\n', hdf5read(file,'/NIDBAS'));
 fprintf(1,'NGAUSS = %d\n', hdf5read(file,'/NGAUSS'));
 fprintf(1,'KDIFF  = %d\n', hdf5read(file,'/KDIFF'));
 
 x = hdf5read(file, '/XGRID');
 f= hdf5read(file, '/SOLCAL');
 fexact= hdf5read(file, '/SOLANA');
 err=hdf5read(file, '/ERR'); 
 f=f';
 fexact=fexact';
 err=err';
 
 figure
 subplot(311)
 plot(x,f(1,:),'o',x,fexact(1,:))
 xlabel('X')
 ylabel('Function')
 grid on
 subplot(312)
 plot(x,f(2,:),'o',x,fexact(2,:))
 xlabel('X')
 ylabel('1st Derivative')
 grid on
 subplot(313)
 plot(x,f(3,:),'o',x,fexact(3,:))
 xlabel('X')
 ylabel('2nd Derivative')
 grid on
 
 figure
 subplot(311)
 plot(x,err(1,:),'o-')
 xlabel('X');
 ylabel('Errors on function');
 grid on
 subplot(312)
 plot(x,err(2,:),'o-')
 xlabel('X');
 ylabel('Errors on 1st derivative');
 grid on
 subplot(313)
 plot(x,err(3,:),'o-')
 xlabel('X');
 ylabel('Errors on 2nd derivative');
 grid on
diff --git a/matlab/pde1d_eig.m b/matlab/pde1d_eig.m
index 8b39b43..70b5434 100644
--- a/matlab/pde1d_eig.m
+++ b/matlab/pde1d_eig.m
@@ -1,30 +1,30 @@
 %
 % @file pde1d_eig.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='pde1d_eig.h5';
 
 %%% get sparse matrix and its diagonal elelments in dig %%%
 [mata,diag]=zcsr_mat(file,'/MAT');
 spy(mata,12);
diff --git a/matlab/pde1d_eig_zcsr.m b/matlab/pde1d_eig_zcsr.m
index c6fa3e7..5cbe4d1 100644
--- a/matlab/pde1d_eig_zcsr.m
+++ b/matlab/pde1d_eig_zcsr.m
@@ -1,30 +1,30 @@
 %
 % @file pde1d_eig_zcsr.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='pde1d_eig.h5';
 
 %%% get sparse matrix and its diagonal elelments in dig %%%
 [mata,diag]=zcsr_mat(file,'/MAT');
 spy(mata,12);
diff --git a/matlab/pde1d_eig_zmumps.m b/matlab/pde1d_eig_zmumps.m
index 2a431bd..b9c721c 100644
--- a/matlab/pde1d_eig_zmumps.m
+++ b/matlab/pde1d_eig_zmumps.m
@@ -1,55 +1,55 @@
 %
 % @file pde1d_eig_zmumps.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='pde1d_eig_zmumps.h5';
 
 %%% Read mumps matrix and convert to Matlab sparse format
 [mata,diag_ele]=zmumps_mat(file,'/MAT');
 n=size(mata,1);
 
 %spy(mata,12);
 arpack_vals=h5Complex_ll(file,'/eig_vals');
 nev=size(arpack_vals,1);
 arpack_vecs = h5Complex_ll(file, '/eig_vecs');
 
 %%% Compute eigen values and vectors, using EIGS
 [V,D,FLAG]=eigs(mata,nev,'SM');
 [eigs_vals,perm]=sort(diag(D));
 eigs_vecs=V(:,perm);
 
 fprintf('Eigenvalues from Arpack and Matlab eigs\n');
 for i=1:nev
    fprintf('%i (%.5e %.5e), %.5e\n',i,real(arpack_vals(i)),imag(arpack_vals(i)),eigs_vals(i));
 end
 fprintf('Norm of difference %.3e\n', norm(arpack_vals-eigs_vals,Inf));
 
 %%% Renormalize EIGS %%%
 fprintf('\n\nDiff of Eigenvectors from Arpack and Matlab eigs\n');
 for i=1:nev
     nrm=arpack_vecs(1,i);
     eigs_vecs(:,i) = (eigs_vecs(:,i)./eigs_vecs(1,i)).*nrm;
     diff_vecs = norm(arpack_vecs(:,i)-eigs_vecs(:,i),Inf);
     fprintf('%i %10.3e\n', i, diff_vecs);
 end
diff --git a/matlab/pde1dp.m b/matlab/pde1dp.m
index 97b7a6e..e464d62 100644
--- a/matlab/pde1dp.m
+++ b/matlab/pde1dp.m
@@ -1,41 +1,41 @@
 %
 % @file pde1dp.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='pde1dp.h5';
 fprintf(1,'NX     = %d\n', hdf5read(file,'/NX'));
 fprintf(1,'NIDBAS = %d\n', hdf5read(file,'/NIDBAS'));
 fprintf(1,'NGAUSS = %d\n', hdf5read(file,'/NGAUSS'));
 
 mata=hdf5read(file,'/mata');
 
 xpts = hdf5read(file, '/rhs/x');
 frhs = hdf5read(file, '/rhs/f');
 
 figure
 plot(xpts,frhs);
 xlabel('X')
 ylabel('RHS')
 grid on
 
diff --git a/matlab/pde1dp_cmpl.m b/matlab/pde1dp_cmpl.m
index e07fc31..53f7348 100644
--- a/matlab/pde1dp_cmpl.m
+++ b/matlab/pde1dp_cmpl.m
@@ -1,49 +1,49 @@
 %
 % @file pde1dp_cmpl.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='pde1dp_cmpl.h5';
 
 xgrid=hdf5read(file,'/xgrid');
 nx = size(xgrid)-1;
 
 rhs=h5Complex(file, '/rhs');
 sol=h5Complex(file, '/sol');
 %mat=h5Complex(file, '/mat');
 
 x=hdf5read(file,'/x');
 solana=h5Complex(file,'/solana');
 solcal=h5Complex(file,'/solcal');
 err=hdf5read(file, '/err');
 
 figure
 subplot(211)
 plot(x, real(solana),x,imag(solana),x,real(solcal),'o',x, ...
      imag(solcal), '*')
 legend('Exact Real', 'Exact Imag', 'Calc. Real', 'Calc. Imag')
 xlabel('X'); ylabel('SOL');
 subplot(212)
 plot(x, err, 'o-');
 xlabel('X'); ylabel('|Error|')
 grid on
diff --git a/matlab/pde1dp_cmpl_dft.m b/matlab/pde1dp_cmpl_dft.m
index 10aa61a..72c8b9d 100644
--- a/matlab/pde1dp_cmpl_dft.m
+++ b/matlab/pde1dp_cmpl_dft.m
@@ -1,70 +1,70 @@
 %
 % @file pde1dp_cmpl_dft.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='pde1dp_cmpl_dft.h5';
 
 xgrid=hdf5read(file,'/xgrid');
 nx = size(xgrid)-1;
 mode=0:nx-1;
 
 rhs=h5Complex(file, '/rhs');
 sol=h5Complex(file, '/sol');
 rhs_fft=h5Complex(file, '/rhs_fft');
 sol_fft=h5Complex(file, '/sol_fft');
 mat=h5Complex(file, '/mat');
 
 x=hdf5read(file,'/x');
 solana=h5Complex(file,'/solana');
 solcal=h5Complex(file,'/solcal');
 err=hdf5read(file, '/err');
 
 figure
 subplot(211)
 plot(x, real(solana),x,imag(solana),x,real(solcal),'o',x, ...
      imag(solcal), '*')
 legend('Exact Real', 'Exact Imag', 'Calc. Real', 'Calc. Imag')
 xlabel('X'); ylabel('SOL');
 subplot(212)
 plot(x, err, 'o-');
 xlabel('X'); ylabel('|Error|')
 grid on
 
 figure
 subplot(311)
 plot(mode, real(mat), 'o', mode, imag(mat), '*')
 xlabel('mode'); ylabel('DFT of MAT')
 legend('Real', 'Imag')
 grid on
 
 subplot(312)
 plot(mode, real(rhs_fft), 'o', mode, imag(rhs_fft), '*')
 xlabel('mode'); ylabel('DFT of RHS')
 legend('Real', 'Imag')
 grid on
 subplot(313)
 plot(mode, real(sol_fft), 'o', mode, imag(sol_fft), '*')
 xlabel('mode'); ylabel('DFT of SOL')
 legend('Real', 'Imag')
 grid on
diff --git a/matlab/pde1dp_cmpl_pardiso.m b/matlab/pde1dp_cmpl_pardiso.m
index 1878e8e..bcc75b6 100644
--- a/matlab/pde1dp_cmpl_pardiso.m
+++ b/matlab/pde1dp_cmpl_pardiso.m
@@ -1,72 +1,72 @@
 %
 % @file pde1dp_cmpl_pardiso.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='pde1dp_cmpl_pardiso.h5';
 
 xgrid=hdf5read(file,'/xgrid');
 nx = size(xgrid)-1;
 
 rhs=h5Complex(file, '/rhs');
 sol=h5Complex(file, '/sol');
 
 cols=hdf5read(file, '/MAT/cols');
 irow=hdf5read(file, '/MAT/irow');
 val=h5Complex(file, '/MAT/val');
 perm=hdf5read(file, '/MAT/perm');
 
 cols=double(cols);
 irow=double(irow);
 perm=double(perm);
 n = size(perm,1);
 nnz=size(val,1);
 
 rows = zeros(nnz,1);
 for i=1:n
     s = irow(i);
     e = irow(i+1)-1;
     rows(s:e) = i;
 end
 mat = sparse(rows,cols,val);
 figure
 subplot(121);
 spy(mat);
 subplot(122);
 spy(mat(perm,perm));
 
 x=hdf5read(file,'/x');
 solana=h5Complex(file,'/solana');
 solcal=h5Complex(file,'/solcal');
 err=hdf5read(file, '/err');
 
 figure
 subplot(211)
 plot(x, real(solana),x,imag(solana),x,real(solcal),'o',x, ...
      imag(solcal), '*')
 legend('Exact Real', 'Exact Imag', 'Calc. Real', 'Calc. Imag')
 xlabel('X'); ylabel('SOL');
 subplot(212)
 plot(x, err, 'o-');
 xlabel('X'); ylabel('|Error|')
 grid on
diff --git a/matlab/pde2d.m b/matlab/pde2d.m
index c375cef..e3ee9fe 100644
--- a/matlab/pde2d.m
+++ b/matlab/pde2d.m
@@ -1,85 +1,85 @@
 %
 % @file pde2d.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='pde2d.h5';
 %
 %     Get data from data sets
 %
 r=hdf5read(file,'/xgrid');
 t=hdf5read(file,'/ygrid');
 sol=hdf5read(file,'/sol')';
 solexact=hdf5read(file,'/solana')';
 err=hdf5read(file,'/errors')';
 solr=hdf5read(file,'/derivx')';
 solt=hdf5read(file,'/derivy')';
 %
 %     Attributes
 %
 NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
 NIDBAS1=hdf5read(file,'/NIDBAS1');
 NIDBAS2=hdf5read(file,'/NIDBAS2');
 MBESS=hdf5read(file,'/MBESS');
 LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ...
               NIDBAS1, NIDBAS2, MBESS);
 
 attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data;
 attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data;
 attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data;
 attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data;
 attr=hdf5read(file,'/errors/title');err_ann=attr.Data;
 
 [R,T]=meshgrid(r,t);
 x = R.*cos(T); y= R.*sin(T);
 solx = cos(T).*solr - sin(T)./R.*solt;
 soly = sin(T).*solr + cos(T)./R.*solt;
 
 figure
 subplot(221)
 pcolor(double(r),double(t),double(sol));
 shading interp
 hold on, quiver(r,t,solr,solt)
 xlabel(x_ann); ylabel(y_ann)
 title(LABEL)
 colorbar
 
 subplot(222)
 pcolor(double(x),double(y),double(sol))
 shading interp
 hold on, quiver(x,y,solx,soly)
 hold off, axis image
 xlabel('X'); ylabel('Y')
 title('X-Y plane')
 colorbar
 
 subplot(223)
 surfc(double(x),double(y),double(sol))
 xlabel('X'); ylabel('Y');
 title(sol_ann)
 
 subplot(224)
 surfc(double(x),double(y),double(err))
 xlabel('X'); ylabel('Y');
 title(err_ann)
 
diff --git a/matlab/pde2d_mumps.m b/matlab/pde2d_mumps.m
index 268f2ed..62950d8 100644
--- a/matlab/pde2d_mumps.m
+++ b/matlab/pde2d_mumps.m
@@ -1,97 +1,97 @@
 %
 % @file pde2d_mumps.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='pde2d_mumps.h5';
 
 nr=hdf5read(file,'/', 'NX'); 
 nth=hdf5read(file,'/', 'NY');
 NIDBAS1=hdf5read(file,'/','NIDBAS1');
 NIDBAS2=hdf5read(file,'/','NIDBAS2');
 MBESS=hdf5read(file,'/','MBESS');
 
 r=hdf5read(file,'/xgrid');
 t=hdf5read(file,'/ygrid');
 sol=hdf5read(file,'/sol')';
 solexact=hdf5read(file,'/solana')';
 err=hdf5read(file,'/errors')';
 solr=hdf5read(file,'/derivx')';
 solt=hdf5read(file,'/derivy')';
 LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', nr, nth, ...
               NIDBAS1, NIDBAS2, MBESS);
 
 figure
 subplot(211)
 plot(r, sol(nth/2,:), 'o', r, solexact(nth/2,:))
 xlabel('r')
 grid on
 title(LABEL)
 subplot(212)
 if MBESS == 0
     plot(t, sol(:,1), 'o', t, solexact(:,1))
 else
     plot(t, sol(:,nr/2), 'o', t, solexact(:,nr/2))
 end
 xlabel('\theta')
 grid on
 
 
 
 
 % $$$ if verLessThan('matlab', '7.9');
 % $$$     n = hdf5read(file,'/MAT/RANK');
 % $$$     nnz = hdf5read(file,'/MAT/NNZ');
 % $$$     nlsym = hdf5read(file,'/MAT/NLSYM');
 % $$$ else
 % $$$     n = hdf5read(file,'/MAT/', 'RANK');
 % $$$     nnz = hdf5read(file,'/MAT/', 'NNZ');
 % $$$     nlsym = hdf5read(file,'/MAT/', 'NLSYM');
 % $$$ end
 % $$$ 
 % $$$ cols=hdf5read(file, '/MAT/cols');
 % $$$ irow=hdf5read(file, '/MAT/irow');
 % $$$ val=hdf5read(file, '/MAT/val');
 % $$$ perm=hdf5read(file, '/MAT/perm');
 % $$$ 
 % $$$ rows = zeros(nnz,1);
 % $$$ cols=double(cols);
 % $$$ irow=double(irow);
 % $$$ perm=double(perm);
 % $$$ 
 % $$$ for i=1:n
 % $$$     s = irow(i);
 % $$$     e = irow(i+1)-1;
 % $$$     rows(s:e) = i;    
 % $$$ end
 % $$$ 
 % $$$ mat = sparse(rows,cols,val);
 % $$$ figure
 % $$$ subplot(121);
 % $$$ spy(mat(perm,perm));
 % $$$ title('Matrix structure')
 % $$$ subplot(122);
 % $$$ spy(chol(mat(perm,perm)));
 % $$$ title('Factor L^T')
 
 
diff --git a/matlab/pde2d_nh.m b/matlab/pde2d_nh.m
index cd12593..b60a4fe 100644
--- a/matlab/pde2d_nh.m
+++ b/matlab/pde2d_nh.m
@@ -1,101 +1,101 @@
 %
 % @file pde2d_nh.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='pde2d_nh.h5';
 %
 %     Get data from data sets
 %
 r=hdf5read(file,'/xgrid');
 t=hdf5read(file,'/ygrid');
 sol=hdf5read(file,'/sol')';
 solexact=hdf5read(file,'/solana')';
 err=hdf5read(file,'/errors')';
 errx=hdf5read(file,'/errors_x')';
 erry=hdf5read(file,'/errors_y')';
 solr=hdf5read(file,'/derivx')';
 solt=hdf5read(file,'/derivy')';
 %
 %     Attributes
 %
 NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
 NIDBAS1=hdf5read(file,'/NIDBAS1');
 NIDBAS2=hdf5read(file,'/NIDBAS2');
 MBESS=hdf5read(file,'/MBESS');
 LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ...
               NIDBAS1, NIDBAS2, MBESS);
 
 attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data;
 attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data;
 attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data;
 attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data;
 attr=hdf5read(file,'/errors/title');err_ann=attr.Data;
 
 [R,T]=meshgrid(r,t);
 x = R.*cos(T); y= R.*sin(T);
 solx = cos(T).*solr - sin(T)./R.*solt;
 soly = sin(T).*solr + cos(T)./R.*solt;
 
 figure
 subplot(221)
 pcolor(double(r),double(t),double(sol));
 shading interp
 hold on, quiver(r,t,solr,solt)
 xlabel(x_ann); ylabel(y_ann)
 title(LABEL)
 colorbar
 
 subplot(222)
 pcolor(double(x),double(y),double(sol))
 shading interp
 hold on, quiver(x,y,solx,soly)
 hold off, axis image
 xlabel('X'); ylabel('Y')
 title('X-Y plane')
 colorbar
 
 subplot(223)
 surfc(double(x),double(y),double(sol))
 xlabel('X'); ylabel('Y');
 title(sol_ann)
 
 subplot(224)
 surfc(double(x),double(y),double(err))
 xlabel('X'); ylabel('Y');
 title(err_ann)
 
 figure
 subplot(311)
 plot(t,err(:,NR+1),'o-')
 xlabel('\theta'); ylabel('Error on solution')
 grid on
 title('Error on Boundary r=1');
 subplot(312)
 plot(t,errx(:,NR+1),'o-')
 xlabel('\theta'); ylabel('Error on x-derivative')
 grid on
 subplot(313)
 plot(t,erry(:,NR+1),'o-')
 xlabel('\theta'); ylabel('Error on y-derivative')
 grid on
diff --git a/matlab/pde2d_pardiso.m b/matlab/pde2d_pardiso.m
index 1ec3ae3..5b4604c 100644
--- a/matlab/pde2d_pardiso.m
+++ b/matlab/pde2d_pardiso.m
@@ -1,63 +1,63 @@
 %
 % @file pde2d_pardiso.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='pde2d_pardiso.h5';
 
 if verLessThan('matlab', '7.9');
     n = hdf5read(file,'/MAT/RANK');
     nnz = hdf5read(file,'/MAT/NNZ');
     nlsym = hdf5read(file,'/MAT/NLSYM');
 else
     n = hdf5read(file,'/MAT/', 'RANK');
     nnz = hdf5read(file,'/MAT/', 'NNZ');
     nlsym = hdf5read(file,'/MAT/', 'NLSYM');
 end
 
 cols=hdf5read(file, '/MAT/cols');
 irow=hdf5read(file, '/MAT/irow');
 val=hdf5read(file, '/MAT/val');
 perm=hdf5read(file, '/MAT/perm');
 
 rows = zeros(nnz,1);
 cols=double(cols);
 irow=double(irow);
 perm=double(perm);
 
 for i=1:n
     s = irow(i);
     e = irow(i+1)-1;
     rows(s:e) = i;    
 end
 
 mat = sparse(rows,cols,val);
 figure
 subplot(121);
 spy(mat(perm,perm));
 title('Matrix structure')
 subplot(122);
 spy(chol(mat(perm,perm)));
 title('Factor L^T')
 
 
diff --git a/matlab/pde2d_sym_pardiso.m b/matlab/pde2d_sym_pardiso.m
index 7eb2c8f..b23f0a7 100644
--- a/matlab/pde2d_sym_pardiso.m
+++ b/matlab/pde2d_sym_pardiso.m
@@ -1,136 +1,136 @@
 %
 % @file pde2d_sym_pardiso.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='pde2d_sym_pardiso.h5';
 %
 %     Get data from data sets
 %
 r=hdf5read(file,'/xgrid');
 t=hdf5read(file,'/ygrid');
 sol=hdf5read(file,'/sol')';
 solexact=hdf5read(file,'/solana')';
 err=hdf5read(file,'/errors')';
 solr=hdf5read(file,'/derivx')';
 solt=hdf5read(file,'/derivy')';
 %
 %     Attributes
 %
 if verLessThan('matlab', '7.9');
     NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
     NIDBAS1=hdf5read(file,'/NIDBAS1');
     NIDBAS2=hdf5read(file,'/NIDBAS2');
     MBESS=hdf5read(file,'/MBESS');
     EPSI=hdf5read(file,'/EPSI');
     attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data;
     attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data;
     attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data;
     attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data;
     attr=hdf5read(file,'/errors/title');err_ann=attr.Data;
 else
     NR=hdf5read(file,'/','NX'); NTH=hdf5read(file,'/','NY');
     NIDBAS1=hdf5read(file,'/','NIDBAS1');
     NIDBAS2=hdf5read(file,'/','NIDBAS2');
     MBESS=hdf5read(file,'/','MBESS');
     EPSI=hdf5read(file,'/','EPSI');
     attr=hdf5read(file,'/xgrid/','title'); x_ann=attr.Data;
     attr=hdf5read(file,'/ygrid/','title'); y_ann=attr.Data;
     attr=hdf5read(file,'/sol/','title'); sol_ann=attr.Data;
     attr=hdf5read(file,'/solana/','title'); solexact_ann=attr.Data;
     attr=hdf5read(file,'/errors/','title');err_ann=attr.Data;
 end
 LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d, epsi = %3.2f', ...
               NR, NTH, NIDBAS1, NIDBAS2, MBESS, EPSI);
 
 
 [R,T]=meshgrid(r,t);
 x = R.*cos(T); y= R.*sin(T);
 solx = cos(T).*solr - sin(T)./R.*solt;
 soly = sin(T).*solr + cos(T)./R.*solt;
 
 figure
 subplot(221)
 pcolor(double(r),double(t),double(sol));
 shading interp
 hold on, quiver(r,t,solr,solt)
 xlabel(x_ann); ylabel(y_ann)
 title(LABEL)
 colorbar
 
 subplot(222)
 pcolor(double(x),double(y),double(sol))
 shading interp
 hold on, quiver(x,y,solx,soly)
 hold off, axis image
 xlabel('X'); ylabel('Y')
 title('X-Y plane')
 colorbar
 
 subplot(223)
 surfc(double(x),double(y),double(sol))
 xlabel('X'); ylabel('Y');
 title(sol_ann)
 
 subplot(224)
 surfc(double(x),double(y),double(err))
 xlabel('X'); ylabel('Y');
 title(err_ann)
 
 
 
 if verLessThan('matlab', '7.9');
     n = hdf5read(file,'/MAT/RANK');
     nnz = hdf5read(file,'/MAT/NNZ');
     nlsym = hdf5read(file,'/MAT/NLSYM');
 else
     n = hdf5read(file,'/MAT/', 'RANK');
     nnz = hdf5read(file,'/MAT/', 'NNZ');
     nlsym = hdf5read(file,'/MAT/', 'NLSYM');
 end
 
 cols=hdf5read(file, '/MAT/cols');
 irow=hdf5read(file, '/MAT/irow');
 val=hdf5read(file, '/MAT/val');
 perm=hdf5read(file, '/MAT/perm');
 
 rows = zeros(nnz,1);
 cols=double(cols);
 irow=double(irow);
 perm=double(perm);
 
 for i=1:n
     s = irow(i);
     e = irow(i+1)-1;
     rows(s:e) = i;    
 end
 
 mat = sparse(rows,cols,val);
 figure
    subplot(121)
    spy(mat)
    title('Original Matrix structure')
    subplot(122)
    spy(mat(perm,perm))
    title('Permuted Matrix structure')
 
diff --git a/matlab/pde2d_sym_pardiso_dft.m b/matlab/pde2d_sym_pardiso_dft.m
index 5b0f948..b5103b7 100644
--- a/matlab/pde2d_sym_pardiso_dft.m
+++ b/matlab/pde2d_sym_pardiso_dft.m
@@ -1,171 +1,171 @@
 %
 % @file pde2d_sym_pardiso_dft.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 mat_disp=1;
 
 file='pde2d_sym_pardiso_dft.h5';
 
 if verLessThan('matlab', '7.9');
     n = hdf5read(file,'/MAT1/RANK');
     nnz = hdf5read(file,'/MAT1/NNZ');
     nlsym = hdf5read(file,'/MAT1/NLSYM');
     NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
     NIDBAS1=hdf5read(file,'/NIDBAS1');
     NIDBAS2=hdf5read(file,'/NIDBAS2');
     MBESS=hdf5read(file,'/MBESS');
     EPSI=hdf5read(file,'/EPSI');
     KMIN=hdf5read(file,'/KMIN');
     KMAX=hdf5read(file,'/KMAX');
 else
     n = hdf5read(file,'/MAT1/', 'RANK');
     nnz = hdf5read(file,'/MAT1/', 'NNZ');
     nlsym = hdf5read(file,'/MAT1/', 'NLSYM');
     NR=hdf5read(file,'/', 'NX'); NTH=hdf5read(file,'/', 'NY');
     NIDBAS1=hdf5read(file,'/', 'NIDBAS1');
     NIDBAS2=hdf5read(file,'/', 'NIDBAS2');
     MBESS=hdf5read(file,'/', 'MBESS');
     EPSI=hdf5read(file,'/','EPSI');
     KMIN=hdf5read(file,'/','KMIN');
     KMAX=hdf5read(file,'/','KMAX');
 end
 LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d, epsi = %3.2f', NR, NTH, ...
               NIDBAS1, NIDBAS2, MBESS, EPSI);
 DK = KMAX-KMIN+1;
 
 attr=hdf5read(file,'/xgrid/','title'); x_ann=attr.Data;
 attr=hdf5read(file,'/ygrid/','title'); y_ann=attr.Data;
 attr=hdf5read(file,'/sol/','title'); sol_ann=attr.Data;
 attr=hdf5read(file,'/solana/','title'); solexact_ann=attr.Data;
 attr=hdf5read(file,'/errors/','title');err_ann=attr.Data;
 
 if mat_disp == 1
     cols=hdf5read(file, '/MAT1/cols');
     irow=hdf5read(file, '/MAT1/irow');
     val=h5Complex(file, '/MAT1/val');
     perm=hdf5read(file, '/MAT1/perm');
 
     rows = zeros(nnz,1);
     cols=double(cols);
     irow=double(irow);
     perm=double(perm);
 
     for i=1:n
         s = irow(i);
         e = irow(i+1)-1;
         rows(s:e) = i;    
     end
 
     valr=real(val); vali=imag(val);
     mat = sparse(rows,cols,valr);
     figure
     subplot(121)
     spy(mat,8)
     title('Original Matrix structure')
     subplot(122)
     spy(mat(perm,perm),8)
     title('Permuted Matrix structure')
 end
 
 r=hdf5read(file,'/xgrid');
 t=hdf5read(file,'/ygrid');
 sol=hdf5read(file,'/sol')';
 solexact=hdf5read(file,'/solana')';
 err=hdf5read(file,'/errors')';
 solr=hdf5read(file,'/derivx')';
 solt=hdf5read(file,'/derivy')';
 
 [R,T]=meshgrid(r,t);
 x = R.*cos(T); y= R.*sin(T);
 solx = cos(T).*solr - sin(T)./R.*solt;
 soly = sin(T).*solr + cos(T)./R.*solt;
 
 figure
 subplot(221)
 pcolor(r,t,sol);
 shading interp
 hold on, quiver(r,t,solr,solt)
 xlabel(x_ann); ylabel(y_ann)
 title(LABEL)
 colorbar
 
 subplot(222)
 pcolor(x,y,sol)
 shading interp
 hold on, quiver(x,y,solx,soly)
 hold off, axis image
 xlabel('X'); ylabel('Y')
 title('X-Y plane')
 colorbar
 
 subplot(223)
 surfc(x,y,sol)
 xlabel('X'); ylabel('Y');
 title(sol_ann)
 
 subplot(224)
 surfc(x,y,err)
 xlabel('X'); ylabel('Y');
 title(err_ann)
 
 figure
 ft_sol=h5Complex(file,'/FT_SOL');
 ft_sol=reshape(ft_sol,DK,[]);
 m=[KMIN:KMAX]; sp=1:NR+NIDBAS1;
 subplot(121)
   stem3(sp,m, real(ft_sol), 'filled')
   shading interp
   xlabel('Radial spline number'); ylabel('m')
   title('Real(\phi)')
 subplot(122)
   stem3(sp,m, imag(ft_sol),'filled')
   shading interp
   xlabel('Radial spline number'); ylabel('m')
   title('Imag(\phi)')
 
 figure
 ft_rhs=h5Complex(file,'/FT_RHS');
 ft_rhs=reshape(ft_rhs,DK,[]);
 m=[KMIN:KMAX]; sp=1:NR+NIDBAS1;
 subplot(121)
   stem3(sp,m, real(ft_rhs), 'filled')
   shading interp
   xlabel('Radial spline number'); ylabel('m')
   title('Real(\rho)')
 subplot(122)
   stem3(sp,m, imag(ft_rhs),'filled')
   shading interp
   xlabel('Radial spline number'); ylabel('m')
   title('Imag(\rho)')
 
 figure
 energy_k = h5Complex(file,'/ENERGY_K');
 subplot(211)
 stem(m, real(energy_k));
 xlabel('m'); ylabel('Real(\phi)'); title('Spectral energy')
 subplot(212)
 stem(m, imag(energy_k));
 xlabel('m'); ylabel('Imag(\phi)');  title('Spectral energy')
 
   
diff --git a/matlab/pde3d.m b/matlab/pde3d.m
index f8cb948..17bb7d5 100644
--- a/matlab/pde3d.m
+++ b/matlab/pde3d.m
@@ -1,124 +1,124 @@
 %
 % @file pde3d.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='pde3d.h5';
 %
 %     Get data from data sets
 %
 x=hdf5read(file,'/xgrid');
 y=hdf5read(file,'/ygrid');
 z=hdf5read(file,'/zgrid');
 %
 nx=size(x);
 ny=size(y);
 nz=size(z);
 %
 rhs=hdf5read(file,'/RHS');
 coefs=hdf5read(file,'/SOL');
 bcoef=hdf5read(file,'/BCOEF');
 sol=hdf5read(file,'/sol');
 solexact=hdf5read(file,'/solana');
 solx=hdf5read(file,'/derivx');
 soly=hdf5read(file,'/derivy');
 solz=hdf5read(file,'/derivz');
 solx_exact=hdf5read(file,'/derivx_exact');
 soly_exact=hdf5read(file,'/derivy_exact');
 solz_exact=hdf5read(file,'/derivz_exact');
 
 figure
 k=ceil(nz(1)/2);
 subplot(211);
 pcolor(x,y,sol(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Caculated solution')
 colorbar
 
 subplot(212);
 pcolor(x,y,solexact(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Analytical solution')
 colorbar
 
 figure
 err=sol-solexact;
 pcolor(x,y,err(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Discretization error')
 colorbar
 
 figure
 subplot(211);
 pcolor(x,y,solx(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Caculated d/dx')
 colorbar
 
 subplot(212);
 pcolor(x,y,solx_exact(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Analytical d/dx')
 colorbar
 
 figure
 k=ceil(nz(1)/2);
 subplot(211);
 pcolor(x,y,soly(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Caculated d/dy')
 colorbar
 
 subplot(212);
 pcolor(x,y,soly_exact(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Analytical d/dy')
 colorbar
 
 figure
 k=ceil(nz(1)/6);
 subplot(211);
 pcolor(x,y,solz(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Caculated d/dz')
 colorbar
 
 subplot(212);
 pcolor(x,y,solz_exact(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Analytical d/dz')
 colorbar
 
 figure
 i=ceil(nx(1)/2);
 plot(z, squeeze(sol(i,1,:)), 'o', z, squeeze(solexact(i,1,:)),'r')
 xlabel('z');
diff --git a/matlab/poisson_fe.m b/matlab/poisson_fe.m
index 92012f6..a12b08b 100644
--- a/matlab/poisson_fe.m
+++ b/matlab/poisson_fe.m
@@ -1,123 +1,123 @@
 %
 % @file poisson_fe.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='poisson_fe.h5';
 %
 nx=h5readatt(file,'/','NX');
 ny=h5readatt(file,'/','NY');
 kx=h5readatt(file,'/','KX');
 ky=h5readatt(file,'/','KY');
 nidbas1=h5readatt(file,'/','NIDBAS1');
 nidbas2=h5readatt(file,'/','NIDBAS2');
 relax=h5readatt(file,'/','RELAX');
 nlevels=h5readatt(file,'/','LEVELS');
 nu1=h5readatt(file,'/','NU1');
 nu2=h5readatt(file,'/','NU2');
 mu=h5readatt(file,'/','MU');
 title_str=sprintf(['N=(%d,%d), NIDBAS=(%d,%d), relax=%s, nu1=%d, ' ...
                    'nu2=%d, mu=%d, LEVELS=%d, KX=%d, KY=%d'], ...
                   nx,ny,nidbas1,nidbas2,relax,nu1,nu2,mu,nlevels,kx,ky);
 
 
 %
 %   Prolongation matrices at the coarsest grid
 %
 levels=nlevels;
 mglevel=sprintf('/mglevels/level.%.2d', levels);
 dset=strcat(mglevel,'/matpx');
 matpx=h5read(file,dset);
 dset=strcat(mglevel,'/matpy');
 matpy=h5read(file,dset);
 
 %
 %   FE matrix at the finest grid
 %
 levels=1;
 mglevel=sprintf('/mglevels/level.%.2d', levels);
 dset=strcat(mglevel,'/mata');
 [mata,diag]=csr_mat(file,dset);
 
 f=h5read(file,strcat(mglevel,'/f'));
 v=h5read(file,strcat(mglevel,'/v'));
 f1d=h5read(file,strcat(mglevel,'/f1d'));
 v1d=h5read(file,strcat(mglevel,'/v1d'));
 % $$$ figure
 % $$$ spy(mata)
 
 %
 %    Solutions at the finest grid
 %
 x=h5read(file,'/solutions/xg');
 y=h5read(file,'/solutions/yg');
 dense=h5read(file,'/solutions/dense');
 sol_anal=h5read(file,'/solutions/anal');
 sol_calc=h5read(file,'/solutions/calc');
 sol_direct=h5read(file,'/solutions/direct');
 nx=int32(size(x,1));
 ny=int32(size(y,1));
 
 figure
 surf(x,y,sol_direct'-sol_anal')
 xlabel('X'); ylabel('Y');
 title('Error on the finest grid')
 
 figure
 subplot(211)
 [yy,iy] = max(abs(sol_anal),[],2);
 [xx,ix] = max(yy);
 iy0=iy(ix);
 str=sprintf('Solution at y = %.4f', y(iy0));
 plot(x, sol_anal(:,iy0),x, sol_direct(:,iy0),'o')
 xlabel('x'); ylabel(str);
 grid on
 legend('Analytic Solution', 'Direct Solution')
 title(title_str)
 subplot(212)
 [xx,ix] = max(abs(sol_anal));
 [yy,iy] = max(xx);
 ix0=ix(iy);
 str=sprintf('Solution at x = %.4f', x(ix0));
 plot(y, sol_anal(ix0,:),y, sol_direct(ix0,:),'o')
 xlabel('y'); ylabel(str);
 grid on
 title(title_str)
 
 %
 %   Iterations
 %
 dset='/Iterations/';
 disc_err=h5read(file, strcat(dset,'disc_errors'));
 resid=h5read(file, strcat(dset,'residues'));
 its=0:1:size(resid,1)-1;
 figure
 subplot(211)
 semilogy(its,resid,'o-')
 grid on
 xlabel('Iterations'); ylabel('Norm of residue');
 title(title_str);
 subplot(212)
 semilogy(its,disc_err,'h-')
 grid on
 xlabel('Iterations'); ylabel('Norm of error');
diff --git a/matlab/poisson_mg.m b/matlab/poisson_mg.m
index e407dd6..10f23a3 100644
--- a/matlab/poisson_mg.m
+++ b/matlab/poisson_mg.m
@@ -1,140 +1,140 @@
 %
 % @file poisson_mg.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 if ~exist('file'), file='poisson_mg.h5'; end
 %
 nx=h5readatt(file,'/','NX');
 ny=h5readatt(file,'/','NY');
 lx=h5readatt(file,'/','LX');
 ly=h5readatt(file,'/','LY');
 kx=h5readatt(file,'/','KX');
 ky=h5readatt(file,'/','KY');
 beta=h5readatt(file,'/','BETA');
 omega=h5readatt(file,'/','OMEGA');
 relax=h5readatt(file,'/','RELAX');
 mat_type=h5readatt(file,'/','MAT_TYPE');
 nlevels=h5readatt(file,'/','LEVELS');
 mu=h5readatt(file,'/','MU');
 nnu=h5readatt(file,'/','NNU');
 nu1=h5read(file,'/nu1');
 nu2=h5read(file,'/nu2');
 title_str=sprintf(['N=(%d,%d), Lx=%d, Ly=%d, beta=%.4f, relax=%s, V(%d,%d), ' ...
                    'LEVELS=%d, KX=%d, KY=%d'], ...
                   nx,ny,lx,ly,beta,relax,nu1(nnu),nu2(nnu),nlevels,kx,ky);
 
 
 %
 %   Prolongation matrices at the coarsest grid
 %
 levels=nlevels;
 mglevel=sprintf('/mglevels/level.%.2d', levels);
 dset=strcat(mglevel,'/matpx');
 matpx=csr_mat(file,dset);
 dset=strcat(mglevel,'/matpy');
 matpy=csr_mat(file,dset);
 
 %
 %   FE matrix at the finest grid
 %
 levels=1;
 mglevel=sprintf('/mglevels/level.%.2d', levels);
 dset=strcat(mglevel,'/mata');
 if mat_type == 'csr'
     [mata,diag]=csr_mat(file,dset);
 else
     [mata,diag]=cds_mat(file,dset);
 end
 n=size(diag,1);
 
 % $$$ figure
 % $$$ spy(mata)
 %
 %    Spectral radius of GS Iteration Matrix
 %         Rg = -(D+L)^(-1) * U
 %
 % $$$ matl= tril(mata,0); % D+L
 % $$$ lambda = eigs(-triu(mata,1),matl)
 % $$$ fprintf(1, 'Spectral Radius of GS relaxation matrix = %g\n', max(abs(lambda)))
 % $$$ figure
 % $$$ plot(lambda, 'o')  
 % $$$ axis equal
 % $$$ grid on
 % $$$ xlabel('Real of eigenvalues'); ylabel('Imag of eigenvalues')
 % $$$ title(title_str)
 %
 %    Solutions at the finest grid
 %
 dense=h5read(file,'/dense');
 x=h5read(file,'/solutions/xg');
 y=h5read(file,'/solutions/yg');
 sol_anal=h5read(file,'/solutions/anal');
 %sol_direct=h5read(file,'/solutions/direct');
 sol_calc=h5read(file,'/solutions/calc');
 
 figure
 % $$$ surf(x,y,sol_calc'-sol_anal')
 pcolor(x,y,sol_calc'-sol_anal')
 shading interp
 colorbar
 xlabel('X'); ylabel('Y'); zlabel('Error');
 title(title_str)
 
 figure
 subplot(211)
 [yy,iy] = max(abs(sol_anal),[],2);
 [xx,ix] = max(yy);
 iy0=iy(ix);
 str=sprintf('Solution at y = %.4f', y(iy0));
 plot(x, sol_anal(:,iy0),x, sol_calc(:,iy0),'o')
 xlabel('x'); ylabel(str);
 grid on
 legend('Analytic Solution', 'MG Solution')
 title(title_str)
 subplot(212)
 [xx,ix] = max(abs(sol_anal));
 [yy,iy] = max(xx);
 ix0=ix(iy);
 str=sprintf('Solution at x = %.4f', x(ix0));
 plot(y, sol_anal(ix0,:),y, sol_calc(ix0,:),'o')
 xlabel('y'); ylabel(str);
 grid on
 title(title_str)
 %
 %   Iterations
 %
 dset='/Iterations/';
 disc_err=h5read(file, strcat(dset,'disc_errors'));
 resid=h5read(file, strcat(dset,'residues'));
 its=0:1:size(resid,1)-1;
 figure
 subplot(211)
 semilogy(its,resid,'o-')
 grid on
 xlabel('Iterations'); ylabel('Norm of residue');
 title(title_str);
 subplot(212)
 semilogy(its,disc_err,'h-')
 grid on
 xlabel('Iterations'); ylabel('Norm of error');
diff --git a/matlab/ppde3d.m b/matlab/ppde3d.m
index ed333f2..de73cc6 100644
--- a/matlab/ppde3d.m
+++ b/matlab/ppde3d.m
@@ -1,145 +1,145 @@
 %
 % @file ppde3d.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='ppde3d.h5';
 %
 %     Get data from data sets
 %
 x=hdf5read(file,'/xgrid');
 y=hdf5read(file,'/ygrid');
 z=hdf5read(file,'/zgrid');
 %
 nx=size(x);
 ny=size(y);
 nz=size(z);
 %
 sol=hdf5read(file,'/sol');
 solexact=hdf5read(file,'/solana');
 solx=hdf5read(file,'/derivx');
 soly=hdf5read(file,'/derivy');
 solz=hdf5read(file,'/derivz');
 solx_exact=hdf5read(file,'/derivx_exact');
 soly_exact=hdf5read(file,'/derivy_exact');
 solz_exact=hdf5read(file,'/derivz_exact');
 
 figure
 k=ceil(nz(1)/2);
 subplot(311);
 pcolor(x,y,sol(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Caculated solution')
 colorbar
 
 subplot(312);
 pcolor(x,y,solexact(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Analytical solution')
 colorbar
 
 subplot(313);
 err=sol-solexact;
 pcolor(x,y,err(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Discretization error')
 colorbar
 
 figure
 subplot(311);
 pcolor(x,y,solx(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Caculated d/dx')
 colorbar
 
 subplot(312);
 pcolor(x,y,solx_exact(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Analytical d/dx')
 colorbar
 
 subplot(313);
 err=solx-solx_exact;
 pcolor(x,y,err(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Discretization error')
 colorbar
 
 figure
 k=ceil(nz(1)/2);
 subplot(311);
 pcolor(x,y,soly(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Caculated d/dy')
 colorbar
 
 subplot(312);
 pcolor(x,y,soly_exact(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Analytical d/dy')
 colorbar
 
 subplot(313);
 err=soly-soly_exact;
 pcolor(x,y,err(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Discretization error')
 colorbar
 
 figure
 k=ceil(nz(1)/6);
 subplot(311);
 pcolor(x,y,solz(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Caculated d/dz')
 colorbar
 
 subplot(312);
 pcolor(x,y,solz_exact(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Analytical d/dz')
 colorbar
 
 subplot(313);
 err=solz-solz_exact;
 pcolor(x,y,err(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Discretization error')
 colorbar
 
 figure
 i=ceil(nx(1)/2);
 plot(z, squeeze(sol(i,1,:)), 'o', z, squeeze(solexact(i,1,:)),'r')
 xlabel('z');
diff --git a/matlab/ppde3d_pb.m b/matlab/ppde3d_pb.m
index 0702db8..8541704 100644
--- a/matlab/ppde3d_pb.m
+++ b/matlab/ppde3d_pb.m
@@ -1,145 +1,145 @@
 %
 % @file ppde3d_pb.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='ppde3d_pb.h5';
 %
 %     Get data from data sets
 %
 x=hdf5read(file,'/xgrid');
 y=hdf5read(file,'/ygrid');
 z=hdf5read(file,'/zgrid');
 %
 nx=size(x);
 ny=size(y);
 nz=size(z);
 %
 sol=hdf5read(file,'/sol');
 solexact=hdf5read(file,'/solana');
 solx=hdf5read(file,'/derivx');
 soly=hdf5read(file,'/derivy');
 solz=hdf5read(file,'/derivz');
 solx_exact=hdf5read(file,'/derivx_exact');
 soly_exact=hdf5read(file,'/derivy_exact');
 solz_exact=hdf5read(file,'/derivz_exact');
 
 figure
 k=ceil(nz(1)/2);
 subplot(311);
 pcolor(x,y,sol(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Caculated solution')
 colorbar
 
 subplot(312);
 pcolor(x,y,solexact(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Analytical solution')
 colorbar
 
 subplot(313);
 err=sol-solexact;
 pcolor(x,y,err(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Discretization error')
 colorbar
 
 figure
 subplot(311);
 pcolor(x,y,solx(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Caculated d/dx')
 colorbar
 
 subplot(312);
 pcolor(x,y,solx_exact(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Analytical d/dx')
 colorbar
 
 subplot(313);
 err=solx-solx_exact;
 pcolor(x,y,err(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Discretization error')
 colorbar
 
 figure
 k=ceil(nz(1)/2);
 subplot(311);
 pcolor(x,y,soly(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Caculated d/dy')
 colorbar
 
 subplot(312);
 pcolor(x,y,soly_exact(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Analytical d/dy')
 colorbar
 
 subplot(313);
 err=soly-soly_exact;
 pcolor(x,y,err(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Discretization error')
 colorbar
 
 figure
 k=ceil(nz(1)/6);
 subplot(311);
 pcolor(x,y,solz(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Caculated d/dz')
 colorbar
 
 subplot(312);
 pcolor(x,y,solz_exact(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Analytical d/dz')
 colorbar
 
 subplot(313);
 err=solz-solz_exact;
 pcolor(x,y,err(:,:,k)');
 shading interp
 xlabel('x'); ylabel('y')
 title('Discretization error')
 colorbar
 
 figure
 i=ceil(nx(1)/2);
 plot(z, squeeze(sol(i,1,:)), 'o', z, squeeze(solexact(i,1,:)),'r')
 xlabel('z');
diff --git a/matlab/ppoisson_fd.m b/matlab/ppoisson_fd.m
index a573749..e098384 100644
--- a/matlab/ppoisson_fd.m
+++ b/matlab/ppoisson_fd.m
@@ -1,110 +1,110 @@
 %
 % @file ppoisson_fd.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 if ~exist('file'), file='ppoisson_fd.h5'; end
 
 prb=h5readatt(file,'/','PRB');
 nx=h5readatt(file,'/','NX');
 ny=h5readatt(file,'/','NY');
 lx=h5readatt(file,'/','LX');
 ly=h5readatt(file,'/','LY');
 kx=h5readatt(file,'/','KX');
 ky=h5readatt(file,'/','KY');
 beta=h5readatt(file,'/','BETA');
 omega=h5readatt(file,'/','OMEGA');
 relax=h5readatt(file,'/','RELAX');
 nlevels=h5readatt(file,'/','LEVELS');
 mu=h5readatt(file,'/','MU');
 nu1=h5readatt(file,'/','NU1');
 nu2=h5readatt(file,'/','NU2');
 direct_solve_nits=h5readatt(file,'/','DIRECT_SOLVE_NITS');
 
 title_str=sprintf(['PRB=%s, N=(%d,%d), relax=%s, V(%d,%d), LEVELS=%d, DIRECT SOLVE=%d'], ...
                   prb, nx, ny, relax, nu1, nu2, nlevels, direct_solve_nits);
 
 x = h5read(file, '/xgrid');
 y = h5read(file, '/ygrid');
 [X,Y]=meshgrid(y,x);
 n1=size(x,1);
 n2=size(y,1);
 n=n1*n2
 
 mat = stencil_mat(file, '/MAT');
 % $$$ figure
 % $$$ spy(mat)
 
 f = h5read(file,'/f'); f1d=reshape(f,n,1);
 v = h5read(file,'/v'); v1d=reshape(v,n,1);
 u = h5read(file,'/u'); u1d=reshape(u,n,1);
 
 % $$$ udirect1d = mat\f1d;
 % $$$ udirect=reshape(udirect1d,n1,n2);
 % $$$ fprintf('Residual of direct solution = %.3e\n', norm(mat*udirect1d-f1d));
 % $$$ fprintf('Error of direct solution    = %.3e\n', norm(udirect1d- ...
 % $$$                                                   v1d));
 figure
 subplot(221)
 pcolor(x,y,v'-u')
 xlabel('X'); ylabel('Y');
 shading interp
 colorbar
 title('Error on the finest grid')
 
 subplot(222)
 [yy,iy] = max(abs(v),[],2);
 [xx,ix] = max(yy);
 iy0=iy(ix);
 str=sprintf('Solution at y = %.4f', y(iy0));
 plot(x, v(:,iy0),x, u(:,iy0),'o')
 xlabel('x'); ylabel(str);
 grid on
 legend('Analytic Solution', 'Computed Solution')
 title(title_str)
 
 subplot(223)
 [xx,ix] = max(abs(v));
 [yy,iy] = max(xx);
 ix0=ix(iy);
 str=sprintf('Solution at x = %.4f', x(ix0));
 plot(y, v(ix0,:),y, u(ix0,:),'o')
 xlabel('y'); ylabel(str);
 grid on
 
 figure
 resid_it=h5read(file, '/resid');
 err_it=h5read(file, '/error');
 nits = size(resid_it,1)-1;
 it=0:nits;
 subplot(211)
 semilogy(it,resid_it,'o-')
 xlabel('Iterations')
 ylabel('Residual norm')
 grid on
 title(title_str)
 subplot(212)
 semilogy(it, err_it, 'o-')
 xlabel('Iterations')
 ylabel('Norm of Discretization Error')
 grid on
diff --git a/matlab/relax.m b/matlab/relax.m
index 37d3fca..bfbaa29 100644
--- a/matlab/relax.m
+++ b/matlab/relax.m
@@ -1,55 +1,55 @@
 %
 % @file relax.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 load relax.mat
 
 figure
 subplot(221)
 semilogy(jac_1(:,1),jac_1(:,2),gs_1(:,1),gs_1(:,2))
 grid on
 legend('Jacobi', 'GS')
 xlabel('Iterations'); ylabel('Error')
 title('NX=32, P=1')
 
 subplot(222)
 semilogy(jac_3(:,1),jac_3(:,2),gs_3(:,1),gs_3(:,2))
 grid on
 legend('Jacobi', 'GS')
 xlabel('Iterations'); ylabel('Error')
 title('NX=32, P=3')
 
 subplot(223)
 semilogy(jac_1(:,1),jac_1(:,4),gs_1(:,1),gs_1(:,4))
 grid on
 legend('Jacobi', 'GS')
 xlabel('Iterations'); ylabel('Discretization error')
 title('NX=32, P=1')
 
 subplot(224)
 semilogy(jac_3(:,1),jac_3(:,4),gs_3(:,1),gs_3(:,4))
 grid on
 legend('Jacobi', 'GS')
 xlabel('Iterations'); ylabel('Discretization error')
 title('NX=32, P=3')
diff --git a/matlab/stencil_mat.m b/matlab/stencil_mat.m
index 4283768..6c7030c 100644
--- a/matlab/stencil_mat.m
+++ b/matlab/stencil_mat.m
@@ -1,49 +1,49 @@
 %
 % @file stencil_mat.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 function [mata, diag] = stencil_mat(file, dset)
     id = double(h5read(file, strcat(dset,'/dists')));
     val = h5read(file, strcat(dset,'/val'));
     n1 = size(val,1);
     n2 = size(val,2);
     n = n1*n2;
     ndiag = size(val,3);
     dists = id(:,1) + n1*id(:,2);
     val = reshape(val,n,ndiag);
     
     %%    Shift the off-diagonals  %%
     for k=1:length(dists)
         d=dists(k);
         if d < 0
             val(1:n+d,k) = val(1-d:n,k);
         elseif d > 0
             val(n:-1:d+1,k) = val(n-d:-1:1,k);
         end
     end
     mata = spdiags(val, dists, n,n);
     if nargout == 2
         idiag = find(dists==0);
         diag = val(:,idiag);
     end
diff --git a/matlab/tcdsmat.m b/matlab/tcdsmat.m
index 5331df7..e5a974a 100644
--- a/matlab/tcdsmat.m
+++ b/matlab/tcdsmat.m
@@ -1,91 +1,91 @@
 %
 % @file tcdsmat.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 mat='/MAT1';
 gbmat;
 clear S gb_mat;
 
 file='tcdsmat.h5';
 nx=hdf5read(file,'/NX');
 ny=hdf5read(file,'/NY');
 dists=hdf5read(file,strcat(mat,'/dists'));
 vals=hdf5read(file,strcat(mat,'/vals'));
 rowv=hdf5read(file,strcat(mat,'/rowv'));
 colh=hdf5read(file,strcat(mat,'/colh'));
 n=hdf5read(file,strcat(mat,'/RANK'));
 nd=hdf5read(file,strcat(mat,'/NDIAGS'));
 
 err=zeros(n,nd);
 
 % Diagonal balancing of matrix
 dbal = 1./sqrt(diag(a));
 a = diag(dbal)*a*diag(dbal);
 
 % Check CDS mat except row ny and column ny
 for k=1:nd
     d=dists(k);
     i1=max(1,1-d); i2=min(n,n-d);
     fprintf(1,'%8d %8d %8d\n',d,i1,i2);
     for i=i1:i2
         j=i+d;
         if (i~=ny && j~=ny)
             err(i,k) = a(i,j)-vals(i,k);
         end
     end
 end
 fprintf(1,'min/max of err: %8.4e, %8.4e\n',min(min(err)), max(max(err)));
 
 % Check row ny and j .ne. ny
 i=ny;
 bw0=size(rowv,1);
 for k=1:nd
     d=dists(k);
     j=i+d;
     if ((j >= ny+1) && (j <= bw0))
         err(i,k)=a(i,j)-rowv(j);
     end
 end
 fprintf(1,'min/max of err: %8.4e, %8.4e\n',min(min(err)), max(max(err)));
 
 
 % Check column ny
 j=ny;
 for k=1:nd
     d=dists(k);
     i=j-d;
     if ((i >= ny+1) && (i <= bw0))
         err(i,k)=a(i,j)-colh(i);
     end
 end
 fprintf(1,'min/max of err: %8.4e, %8.4e\n',min(min(err)), max(max(err)));
 
 % Check RHS
 rhs=hdf5read(file,'/RHS');
 fprintf('Err in RHS: %8.3e\n', max(max(abs(rhs-rhs0))))
 
 % Check SOL
 sol=hdf5read(file,'/SOL');
 err= sol-sol0;
 fprintf('Err SOL:  %8.3e\n', max(max(abs(err))));
diff --git a/matlab/tcdsmat_plot_sol.m b/matlab/tcdsmat_plot_sol.m
index 0754929..68234c5 100644
--- a/matlab/tcdsmat_plot_sol.m
+++ b/matlab/tcdsmat_plot_sol.m
@@ -1,79 +1,79 @@
 %
 % @file tcdsmat_plot_sol.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='tcdsmat.h5'
 %
 %     Get data from data sets
 %
 r=hdf5read(file,'/xgrid');
 t=hdf5read(file,'/ygrid');
 sol=hdf5read(file,'/sol')';
 solexact=hdf5read(file,'/solana')';
 err=hdf5read(file,'/errors')';
 %
 %     Attributes
 %
 NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
 NIDBAS1=hdf5read(file,'/NIDBAS1');
 NIDBAS2=hdf5read(file,'/NIDBAS2');
 MBESS=hdf5read(file,'/MBESS');
 LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ...
               NIDBAS1, NIDBAS2, MBESS);
 
 attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data;
 attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data;
 attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data;
 attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data;
 attr=hdf5read(file,'/errors/title');err_ann=attr.Data;
 
 [R,T]=meshgrid(r,t);
 x = R.*cos(T); y= R.*sin(T);
 
 figure
 subplot(221)
 pcolor(double(r),double(t),double(sol));
 shading interp
 xlabel(x_ann); ylabel(y_ann)
 title(LABEL)
 colorbar
 
 subplot(222)
 pcolor(double(x),double(y),double(sol))
 shading interp
 hold off, axis image
 xlabel('X'); ylabel('Y')
 title('X-Y plane')
 colorbar
 
 subplot(223)
 surfc(double(x),double(y),double(sol))
 xlabel('X'); ylabel('Y');
 title(sol_ann)
 
 subplot(224)
 surfc(double(x),double(y),double(err))
 xlabel('X'); ylabel('Y');
 title(err_ann)
 
diff --git a/matlab/test_intergrid.m b/matlab/test_intergrid.m
index 4d89561..5b22016 100644
--- a/matlab/test_intergrid.m
+++ b/matlab/test_intergrid.m
@@ -1,102 +1,102 @@
 %
 % @file test_intergrid.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 if ~exist('file'), file='test_intergrid0.h5'; end
 %
 nx=h5readatt(file,'/','NX');
 ny=h5readatt(file,'/','NY');
 lx=h5readatt(file,'/','LX');
 ly=h5readatt(file,'/','LY');
 kx=h5readatt(file,'/','KX');
 ky=h5readatt(file,'/','KY');
 nlevels=h5readatt(file,'/','LEVELS');
 title_str=sprintf(['N=(%d,%d), Lx=%d, Ly=%d, LEVELS=%d, KX=%d, KY=%d'], ...
                   nx,ny,lx,ly,nlevels,kx,ky);
 
 if nlevels ~= 2
     disp 'levels should be 2!'
     return
 end
 
 %
 %   Prolongation matrices at the coarsest grid
 %
 levels=nlevels;
 mglevel=sprintf('/mglevels/level.%.2d', levels);
 dset=strcat(mglevel,'/matpx');
 matpx=csr_mat(file,dset);
 dset=strcat(mglevel,'/matpy');
 matpy=csr_mat(file,dset);
 
 for l=1:2
     mglevel=sprintf('/mglevels/level.%.2d', l);
     x=h5read(file,strcat(mglevel,'/x'));
     y=h5read(file,strcat(mglevel,'/y'));
     f=h5read(file,strcat(mglevel,'/f'));
     v=h5read(file,strcat(mglevel,'/v'));
     figure
     subplot(221)
     [yy,iy] = max(abs(f),[],2); [xx,ix] = max(yy); iy0=iy(ix);
     str=sprintf('f at y = %.4f', y(iy0));
     plot(x, f(:,iy0),'o-')
     xlabel('x'); ylabel(str);
     grid on
     title(title_str)
     subplot(222)
     [xx,ix] = max(abs(f)); [yy,iy] = max(xx); ix0=ix(iy);
     str=sprintf('f at x = %.4f', x(ix0));
     plot(y, f(ix0,:),'o-')
     xlabel('y'); ylabel(str);
     grid on
     title(title_str)
     subplot(223)
     [yy,iy] = max(abs(v),[],2); [xx,ix] = max(yy); iy0=iy(ix);
     str=sprintf('v at y = %.4f', y(iy0));
     plot(x, v(:,iy0),'ro-')
     xlabel('x'); ylabel(str);
     grid on
     title(title_str)
     subplot(224)
     [xx,ix] = max(abs(v)); [yy,iy] = max(xx); ix0=ix(iy);
     str=sprintf('v at x = %.4f', x(ix0));
     plot(y, v(ix0,:),'ro-')
     xlabel('y'); ylabel(str);
     grid on
     title(title_str)
     if l==1
         ffine=f; vfine=v;
     else
         fcoarse=f; vcoarse=v;
     end
 end
 
 %% Check
 err_restriction = matpx'*ffine*matpy./4 - fcoarse;
 err_prolong = matpx*vcoarse*matpy' - vfine;
 
 
 
 max(max(abs(err_restriction)))
 max(max(abs(err_prolong)))
diff --git a/matlab/test_jacobi.m b/matlab/test_jacobi.m
index 2a71117..875f559 100644
--- a/matlab/test_jacobi.m
+++ b/matlab/test_jacobi.m
@@ -1,96 +1,96 @@
 %
 % @file test_jacobi.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 if ~exist('file'), file='test_jacobi.h5'; end
 
 x = h5read(file, '/xgrid');
 y = h5read(file, '/ygrid');
 [X,Y]=meshgrid(y,x);
 n1=size(x,1);
 n2=size(y,1);
 n=n1*n2
 
 mat = stencil_mat(file, '/MAT');
 % $$$ figure
 % $$$ spy(mat)
 
 f = h5read(file,'/f'); f1d=reshape(f,n,1);
 v = h5read(file,'/v'); v1d=reshape(v,n,1);
 u = h5read(file,'/u'); u1d=reshape(u,n,1);
 resids = h5read(file,'/resids');
 errs = h5read(file,'/errs');
 
 udirect1d = mat\f1d;
 udirect=reshape(udirect1d,n1,n2);
 fprintf('Residual of direct solution = %.3e\n', norm(mat*udirect1d-f1d));
 fprintf('Error of direct solution    = %.3e\n', norm(udirect1d-v1d));
 
 figure
 subplot(211)
 [yy,iy] = max(abs(v),[],2);
 [xx,ix] = max(yy);
 iy0=iy(ix);
 str=sprintf('Solution at y = %.4f', y(iy0));
 plot(x, v(:,iy0),x, u(:,iy0),'o')
 xlabel('x'); ylabel(str);
 grid on
 legend('Analytic Solution', 'Computed Solution')
 subplot(212)
 [xx,ix] = max(abs(v));
 [yy,iy] = max(xx);
 ix0=ix(iy);
 str=sprintf('Solution at x = %.4f', x(ix0));
 plot(y, v(ix0,:),y, u(ix0,:),'o')
 xlabel('y'); ylabel(str);
 grid on
 
 % $$$ figure
 % $$$ subplot(321)
 % $$$ surf(x,y,v'); title('Exact solution')
 % $$$ xlabel('X'); ylabel('Y')
 % $$$ subplot(322)
 % $$$ surf(x,y,f'); title('RHS')
 % $$$ xlabel('X'); ylabel('Y')
 % $$$ subplot(323)
 % $$$ surf(x,y,u'); title('Num. solution')
 % $$$ subplot(324)
 % $$$ surf(x,y,udirect'); title('Direct. solution')
 % $$$ xlabel('X'); ylabel('Y')
 % $$$ subplot(325)
 % $$$ surf(x,y,resids'); title('Residuals')
 % $$$ xlabel('X'); ylabel('Y')
 % $$$ subplot(326)
 % $$$ surf(x,y,errs'); title('Errors')
 % $$$ xlabel('X'); ylabel('Y')
 
 resid_it=h5read(file, '/resid');
 err_it=h5read(file, '/error');
 nits = size(resid_it,1)-1;
 it=0:nits;
 figure
 semilogy(it,resid_it, it, err_it)
 legend('Residual norm', 'Discretization error')
 xlabel('Iterations')
 grid on
diff --git a/matlab/test_jacobig.m b/matlab/test_jacobig.m
index a0e31d2..39dee82 100644
--- a/matlab/test_jacobig.m
+++ b/matlab/test_jacobig.m
@@ -1,84 +1,84 @@
 %
 % @file test_jacobig.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 if ~exist('file'), file='test_jacobig.h5'; end
 
 x = h5read(file, '/xgrid');
 y = h5read(file, '/ygrid');
 [X,Y]=meshgrid(y,x);
 n1=size(x,1);
 n2=size(y,1);
 n=n1*n2
 
 mat = stencil_mat(file, '/MAT');
 % $$$ figure
 % $$$ spy(mat)
 
 f = h5read(file,'/f'); f1d=reshape(f,n,1);
 v = h5read(file,'/v'); v1d=reshape(v,n,1);
 u = h5read(file,'/u'); u1d=reshape(u,n,1);
 
 udirect1d = mat\f1d;
 udirect=reshape(udirect1d,n1,n2);
 fprintf('Residual of direct solution = %.3e\n', norm(mat*udirect1d-f1d));
 fprintf('Error of direct solution    = %.3e\n', norm(udirect1d- ...
                                                   v1d));
 figure
 subplot(221)
 pcolor(x,y,v'-u')
 xlabel('X'); ylabel('Y');
 shading interp
 colorbar
 title('Error')
 
 subplot(223)
 [yy,iy] = max(abs(v),[],2);
 [xx,ix] = max(yy);
 iy0=iy(ix);
 str=sprintf('Solution at y = %.4f', y(iy0));
 plot(x, v(:,iy0),x, u(:,iy0),'o')
 xlabel('x'); ylabel(str);
 grid on
 legend('Analytic Solution', 'Computed Solution')
 
 subplot(224)
 [xx,ix] = max(abs(v));
 [yy,iy] = max(xx);
 ix0=ix(iy);
 str=sprintf('Solution at x = %.4f', x(ix0));
 plot(y, v(ix0,:),y, u(ix0,:),'o')
 xlabel('y'); ylabel(str);
 grid on
 
 resid_it=h5read(file, '/resid');
 err_it=h5read(file, '/error');
 nits = size(resid_it,1)-1;
 it=0:nits;
 
 subplot(222)
 semilogy(it,resid_it, it, err_it)
 legend('Residual norm', 'Discretization error')
 xlabel('Iterations')
 grid on
diff --git a/matlab/test_mg.m b/matlab/test_mg.m
index d0ed10b..5f0b5fd 100644
--- a/matlab/test_mg.m
+++ b/matlab/test_mg.m
@@ -1,164 +1,164 @@
 %
 % @file test_mg.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='test_mg.h5';
 %
 nx=h5readatt(file,'/','NX');
 nidbas=h5readatt(file,'/','NIDBAS');
 relax=h5readatt(file,'/','RELAX');
 levels=h5readatt(file,'/','LEVELS');
 nu1=h5readatt(file,'/','NU1');
 nu2=h5readatt(file,'/','NU2');
 alpha=h5readatt(file,'/','ALPHA');
 omega=h5readatt(file,'/','OMEGA');
 
 if alpha == 0
     kx=h5readatt(file,'/','KMODE');
     title_str=sprintf('N=%d, NIDBAS=%d, KX=%d, relax=%s, omega=%.3f, levels = %d, nu1 = %d, nu2 = %d', ...
                   nx,nidbas,kx,relax, omega, levels, nu1, nu2);
 else
     modem=h5readatt(file,'/','MODEM');
     modep=h5readatt(file,'/','MODEP');
     title_str=sprintf('N=%d, NIDBAS=%d, modem=%d, modep=%d, relax=%s, omega=%.3f, levels = %d, nu1 = %d, nu2 = %d', ...
                   nx,nidbas,modem,modep,relax, omega, levels, nu1, nu2);
 end
 
 
 %
 % Read matrices at coarset grid
 %
 for lev=2:levels
     %
     %  FE mat at fine grid
     mglevel=sprintf('/mglevels/level.%.2d', lev-1);
     dset=strcat(mglevel,'/mata');
     mata_f = gb_mat(file, dset);
 
     %
     %  FE mat at coarse grid
     mglevel=sprintf('/mglevels/level.%.2d', lev);
     dset=strcat(mglevel,'/mata');
     mata_c = gb_mat(file, dset);
 
     %
     %  Prolong mat
     dset=strcat(mglevel,'/matp');
     matp=h5read(file,dset);
     %
     %   Check
     fprintf(1,'Level %d: ||A_coarse - P''*A_fine*P|| = %g\n', lev, norm(matp'*mata_f*matp ...
                                                       - mata_c))
 end
 %
 %   Iterations
 dset='/Iterations/';
 err=h5read(file, strcat(dset,'errors'));
 disc_err=h5read(file, strcat(dset,'disc_errors'));
 resid=h5read(file, strcat(dset,'residues'));
 its=0:1:size(err,1)-1;
 
 figure
 subplot(212)
 semilogy(its,resid,'o-', its, disc_err,'h-')
 legend('Residue', 'Error')
 grid on
 xlabel('Iterations'); ylabel('Norm od residue and error');
 title(title_str);
 
 %
 %   Plot grid values at the last iteration
 xgrid=h5read(file, '/Iterations/xgrid');
 u_calc=h5read(file, '/Iterations/u_calc');
 u_exact=h5read(file, '/Iterations/u_exact');
 u_direct=h5read(file, '/Iterations/u_direct');
 subplot(211)
 plot(xgrid, u_exact, xgrid,u_calc,'o')
 legend('Analytic', 'Calculated')
 xlabel('X');ylabel('Grid values of solution')
 grid on
 title(title_str);
 
 %
 % 
 % $$$ mglevel=sprintf('/mglevels/level.%.2d', 1);
 % $$$ dset=strcat(mglevel,'/mata');
 % $$$ A = gb_mat(file, dset);
 % $$$ D = diag(diag(A),0);
 % $$$ n=rank(A);
 % $$$ k=1:1:n;
 % $$$ if relax(1:2) == 'ja'
 % $$$ %
 % $$$ %  Compute eigenvalues of Rj = D^(-1)*A
 % $$$ %
 % $$$     [V, l] = eig(A,D);
 % $$$     [lambda, iss] = sort(diag(l));
 % $$$     V = V(1:end,iss);
 % $$$ %
 % $$$ %   Spectral radius of Jacobi iteration matrix
 % $$$ %         R(omega) = max |1-omega*lambda|
 % $$$ %
 % $$$     om=0:0.01:1;
 % $$$     for i=1:size(om,2)
 % $$$         rho(i) = max(abs(1-om(i).*lambda));
 % $$$     end
 % $$$     fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(1-omega*lambda)))
 % $$$ 
 % $$$     figure
 % $$$     subplot(211)
 % $$$     plot(k, 1-omega*lambda, 'o-')
 % $$$     xlabel('mode k'); ylabel('Eigen value of inv(D)*A')
 % $$$     grid on
 % $$$     title(title_str)
 % $$$     subplot(212)
 % $$$     plot(om, rho)
 % $$$     xlabel('\omega'); ylabel('Spectral Radius')
 % $$$     grid on
 % $$$     
 % $$$     for i=1:n
 % $$$         k=mod(i-1,4*5)+1;
 % $$$         if k==1
 % $$$             figure
 % $$$             title(title_str)
 % $$$         end
 % $$$         subplot(4,5,k)
 % $$$         str = sprintf('Mode = %d, ||R|| = %.3f', i, 1-omega*lambda(i));
 % $$$         plot(V(:,i)); grid on
 % $$$         title(str)
 % $$$     end
 % $$$ elseif relax(1:2) == 'gs'
 % $$$ %
 % $$$ %    Spectral radius of GS Iteration Matrix
 % $$$ %         Rg = (D-L)^(-1) * U
 % $$$ %
 % $$$     B = tril(A,0); % D-L
 % $$$     lambda = eig(-triu(A,1),B);
 % $$$     fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(lambda)))
 % $$$     figure
 % $$$     plot(lambda, 'o', 'MarkerSize', 6)  
 % $$$     axis equal
 % $$$     grid on
 % $$$     xlabel('Real of eigenvalues'); ylabel('Imag of eigenvalues')
 % $$$     title(title_str)
 % $$$ end
diff --git a/matlab/test_mg2d.m b/matlab/test_mg2d.m
index cac6b9d..279ad29 100644
--- a/matlab/test_mg2d.m
+++ b/matlab/test_mg2d.m
@@ -1,102 +1,102 @@
 %
 % @file test_mg2d.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='test_mg2d.h5';
 %
 nx=h5readatt(file,'/','NX');
 ny=h5readatt(file,'/','NY');
 kx=h5readatt(file,'/','KX');
 ky=h5readatt(file,'/','KY');
 nidbas1=h5readatt(file,'/','NIDBAS1');
 nidbas2=h5readatt(file,'/','NIDBAS2');
 relax=h5readatt(file,'/','RELAX');
 nlevels=h5readatt(file,'/','LEVELS');
 nu1=h5readatt(file,'/','NU1');
 nu2=h5readatt(file,'/','NU2');
 mu=h5readatt(file,'/','MU');
 title_str=sprintf(['N=(%d,%d), NIDBAS=(%d,%d), relax=%s, nu1=%d, ' ...
                    'nu2=%d, mu=%d, LEVELS=%d, KX=%d, KY=%d'], ...
                   nx,ny,nidbas1,nidbas2,relax,nu1,nu2,mu,nlevels,kx,ky);
 
 
 %
 %   Prolongation matrices at the coarsest grid
 %
 levels=nlevels;
 mglevel=sprintf('/mglevels/level.%.2d', levels);
 dset=strcat(mglevel,'/matpx');
 matpx=csr_mat(file,dset);
 dset=strcat(mglevel,'/matpy');
 matpy=csr_mat(file,dset);
 
 %
 %   FE matrix at the finest grid
 %
 levels=1;
 mglevel=sprintf('/mglevels/level.%.2d', levels);
 dset=strcat(mglevel,'/mata');
 [mata,diag]=csr_mat(file,dset);
 
 f=h5read(file,strcat(mglevel,'/f'));
 v=h5read(file,strcat(mglevel,'/v'));
 f1d=h5read(file,strcat(mglevel,'/f1d'));
 v1d=h5read(file,strcat(mglevel,'/v1d'));
 % $$$ figure
 % $$$ spy(mata)
 
 %
 %    Solutions at the finest grid
 %
 x=h5read(file,'/solutions/xg');
 y=h5read(file,'/solutions/yg');
 sol_anal=h5read(file,'/solutions/anal');
 sol_calc=h5read(file,'/solutions/calc');
 % $$$ figure
 % $$$ subplot(211)
 % $$$ surf(x,y,sol_anal')
 % $$$ xlabel('X'); ylabel('Y');
 % $$$ title('Analytical solution on the finest grid')
 % $$$ subplot(212)
 % $$$ surf(x,y,sol_calc')
 % $$$ xlabel('X'); ylabel('Y');
 % $$$ title('Calculated solution on the finest grid')
 
 %
 %   Iterations
 %
 dset='/Iterations/';
 disc_err=h5read(file, strcat(dset,'disc_errors'));
 resid=h5read(file, strcat(dset,'residues'));
 its=0:1:size(resid,1)-1;
 figure
 subplot(211)
 semilogy(its,resid,'o-')
 grid on
 xlabel('Iterations'); ylabel('Norm of residue');
 title(title_str);
 subplot(212)
 semilogy(its,disc_err,'h-')
 grid on
 xlabel('Iterations'); ylabel('Norm of error');
diff --git a/matlab/test_mg2d_cyl.m b/matlab/test_mg2d_cyl.m
index 3258da6..86f6ec5 100644
--- a/matlab/test_mg2d_cyl.m
+++ b/matlab/test_mg2d_cyl.m
@@ -1,141 +1,141 @@
 %
 % @file test_mg2d_cyl.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='test_mg2d_cyl.h5';
 %
 nx=h5readatt(file,'/','NX');
 ny=h5readatt(file,'/','NY');
 nidbas1=h5readatt(file,'/','NIDBAS1');
 nidbas2=h5readatt(file,'/','NIDBAS2');
 modem=h5readatt(file,'/','MODEM');
 modep=h5readatt(file,'/','MODEP');
 relax=h5readatt(file,'/','RELAX');
 nlevels=h5readatt(file,'/','LEVELS');
 nu1=h5readatt(file,'/','NU1');
 nu2=h5readatt(file,'/','NU2');
 mu=h5readatt(file,'/','MU');
 title_str=sprintf(['N=(%d,%d), NIDBAS=(%d,%d), relax=%s, nu1=%d, ' ...
                    'nu2=%d, mu=%d, LEVELS=%d, MODEM=%d, MODEP=%d'], ...
                   nx,ny,nidbas1,nidbas2,relax,nu1,nu2,mu,nlevels,modem,modep);
 
 
 %
 %   Prolongation matrices at the coarsest grid
 %
 levels=nlevels;
 mglevel=sprintf('/mglevels/level.%.2d', levels);
 dset=strcat(mglevel,'/matpx');
 matpx=csr_mat(file,dset);
 dset=strcat(mglevel,'/matpy');
 matpy=csr_mat(file,dset);
 
 %
 %   FE matrix at the finest grid
 %
 levels=1;
 mglevel=sprintf('/mglevels/level.%.2d', levels);
 dset=strcat(mglevel,'/mata');
 [mata,diag]=csr_mat(file,dset);
 
 f=h5read(file,strcat(mglevel,'/f'));
 v=h5read(file,strcat(mglevel,'/v'));
 f1d=h5read(file,strcat(mglevel,'/f1d'));
 v1d=h5read(file,strcat(mglevel,'/v1d'));
 % $$$ figure
 % $$$ spy(mata)
 
 %
 %    Solutions at the finest grid
 %
 x=h5read(file,'/solutions/xg');
 y=h5read(file,'/solutions/yg');
 sol_anal=h5read(file,'/solutions/anal');
 sol_calc=h5read(file,'/solutions/calc');
 sol_direct=h5read(file,'/solutions/direct');
 nx=int32(size(x,1));
 ny=int32(size(y,1));
 
 figure
 subplot(211)
 surf(x,y,sol_anal')
 xlabel('X'); ylabel('Y');
 title('Analytical solution on the finest grid')
 subplot(212)
 surf(x,y,sol_calc')
 % $$$ surf(x,y, (abs(sol_calc'-sol_anal')))
 xlabel('X'); ylabel('Y');
 title('Calculated solution on the finest grid')
 
 figure
 subplot(211)
 plot(x, sol_anal(:,ny/2),x, sol_calc(:,ny/2),'o')
 xlabel('r');
 grid on
 legend('Analytic Solution', 'MG Solution')
 title(title_str)
 subplot(212)
 if modem == 0
     plot(y, sol_anal(1,:),y, sol_calc(1,:),'o')
 else
     plot(y, sol_anal(nx/2,:),y, sol_calc(nx/2,:),'o')
 end
 xlabel('\theta');
 grid on
 title(title_str)
 
 % $$$ figure
 % $$$ subplot(211)
 % $$$ semilogy(x, abs(sol_anal(:,ny/2)-sol_calc(:,ny/2)),'o')
 % $$$ xlabel('r'); ylabel('Error')
 % $$$ grid on
 % $$$ title(title_str)
 % $$$ subplot(212)
 % $$$ if modem == 0
 % $$$     semilogy(y, abs(sol_anal(1,:)-sol_calc(1,:)),'o')
 % $$$ else
 % $$$     semilogy(y, abs(sol_anal(nx/2,:)-sol_calc(nx/2,:)),'o')
 % $$$ end
 % $$$ xlabel('\theta'); ylabel('Error');
 % $$$ grid on
 % $$$ title(title_str)
 % $$$ 
 
 %
 %   Iterations
 %
 dset='/Iterations/';
 disc_err=h5read(file, strcat(dset,'disc_errors'));
 resid=h5read(file, strcat(dset,'residues'));
 its=0:1:size(resid,1)-1;
 figure
 subplot(211)
 semilogy(its,resid,'o-')
 grid on
 xlabel('Iterations'); ylabel('Norm of residue');
 title(title_str);
 subplot(212)
 semilogy(its,disc_err,'h-')
 grid on
 xlabel('Iterations'); ylabel('Norm of error');
diff --git a/matlab/test_mgp.m b/matlab/test_mgp.m
index 4f5fb1b..69d5214 100644
--- a/matlab/test_mgp.m
+++ b/matlab/test_mgp.m
@@ -1,98 +1,98 @@
 %
 % @file test_mgp.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='test_mgp.h5';
 %
 nx=h5readatt(file,'/','NX');
 nidbas=h5readatt(file,'/','NIDBAS');
 relax=h5readatt(file,'/','RELAX');
 levels=h5readatt(file,'/','LEVELS');
 nu1=h5readatt(file,'/','NU1');
 nu2=h5readatt(file,'/','NU2');
 title_str=sprintf('NX = %d, NIDBAS = %d, levels = %d, nu1 = %d, nu2 = %d', nx, nidbas, levels, nu1, nu2);
 %
 % Read matrices at coarset grid
 %
 for lev=2:levels
 %
 %  FE mat at fine grid
 mglevel=sprintf('/mglevels/level.%.2d', lev-1);
 dset=strcat(mglevel,'/mata');
 mata_f=h5read(file,dset);
 n=size(mata_f,1);
 %
 %  FE mat at coarse grid
 mglevel=sprintf('/mglevels/level.%.2d', lev);
 dset=strcat(mglevel,'/mata');
 mata_c=h5read(file,dset);
 n=size(mata_c,1);
 %
 %  Prolong mat
 dset=strcat(mglevel,'/matp');
 matp=h5read(file,dset);
 %
 %   Check
 fprintf(1,'Level %d: ||A_coarse - P''*A_fine*P|| = %g\n', lev, norm(matp'*mata_f*matp ...
                                                   - mata_c))
 end
 %
 %   Iterations
 dset='/Iterations/';
 err=h5read(file, strcat(dset,'errors'));
 disc_err=h5read(file, strcat(dset,'disc_errors'));
 resid=h5read(file, strcat(dset,'residues'));
 its=0:1:size(err,1)-1;
 
 figure
 subplot(221)
 semilogy(its,resid,'o-', its, disc_err,'h-')
 legend('Residue', 'Error')
 grid on
 xlabel('Iterations'); ylabel('Norm od residue and error');
 title(title_str);
 
 %
 %   Plot grid values at the last iteration
 xgrid=h5read(file, '/Iterations/xgrid');
 u_calc=h5read(file, '/Iterations/u_calc');
 u_exact=h5read(file, '/Iterations/u_exact');
 u_direct=h5read(file, '/Iterations/u_direct');
 subplot(222)
 plot(xgrid,u_exact, xgrid,u_calc,'o')
 xlabel('X');ylabel('Grid values of solution')
 grid on
 title(title_str);
 
 subplot(223)
 semilogy(xgrid,abs(u_calc-u_direct))
 xlabel('X');ylabel('Diff with direct solution')
 grid on
 title(title_str);
 
 subplot(224)
 semilogy(xgrid,abs(u_calc-u_exact))
 xlabel('X');ylabel('Diff with exact solution')
 grid on
 title(title_str);
diff --git a/matlab/test_relax.m b/matlab/test_relax.m
index 938b605..8d8fbbb 100644
--- a/matlab/test_relax.m
+++ b/matlab/test_relax.m
@@ -1,157 +1,157 @@
 %
 % @file test_relax.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='test_relax.h5';
 %
 nx=h5readatt(file,'/','NX');
 alpha=h5readatt(file,'/','ALPHA');
 nidbas=h5readatt(file,'/','NIDBAS');
 relax=h5readatt(file,'/','RELAX');
 omega=h5readatt(file,'/','OMEGA');
 if alpha == 0
     kx=h5readatt(file,'/','KX');
     title_str=sprintf('N=%d, NIDBAS=%d, KX=%d, relax=%s, omega=%.3f', ...
                   nx,nidbas,kx,relax, omega);
 else
     modem=h5readatt(file,'/','MODEM');
     modep=h5readatt(file,'/','MODEP');
     title_str=sprintf('N=%d, NIDBAS=%d, modem=%d, modep=%d, relax=%s, omega=%.3f', ...
                   nx,nidbas,modem,modep,relax, omega);
 end
 
 
 %
 %    Solutions at the finest grid
 %
 x=h5read(file,'/solutions/xg');
 sol_direct=h5read(file,'/solutions/direct');
 sol_anal=h5read(file,'/solutions/anal');
 sol_calc=h5read(file,'/solutions/calc');
 figure
 subplot(211)
 plot(x, sol_anal, x, sol_calc, 'o')
 legend('Analytic', 'Calculated')
 xlabel('X')
 grid on
 title(title_str);
 
 %
 %    Relaxations
 %
 errdisc=h5read(file,'/relaxation/errdisc');
 resid=h5read(file,'/relaxation/resid');
 its=0:1:size(errdisc)-1;
 subplot(212)
 semilogy(its,errdisc,its,resid)
 legend('Discretisation Error', 'Residual Norm')
 xlabel('Iterations')
 grid on
 title(title_str)
 
 %
 %  FE Matrix
 %
 dset = '/MATA/';
 A = gb_mat(file, dset);
 D = diag(diag(A),0);
 n=rank(A);
 k=1:1:n;
 if relax(1:2) == 'ja'
 %
 %  Compute eigenvalues of Rj = D^(-1)*A
 %
     [V, l] = eig(A,D);
     [lambda, iss] = sort(diag(l));
     V = V(1:end,iss);
 %
 %   Spectral radius of Jacobi iteration matrix
 %         R(omega) = max |1-omega*lambda|
 %
     om=0:0.01:1;
     for i=1:size(om,2)
         rho(i) = max(abs(1-om(i).*lambda));
     end
     fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(1-omega*lambda)))
 
     figure
     subplot(211)
     plot(k, 1-omega*lambda, 'o-')
     xlabel('mode k'); ylabel('Eigen value of inv(D)*A')
     grid on
     title(title_str)
     subplot(212)
     plot(om, rho)
     omega_c = 2.0/max(lambda);
     str = sprintf('Critical omega = %.3f', omega_c)
     title(str)
     xlabel('\omega'); ylabel('Spectral Radius')
     grid on
     
 elseif relax(1:2) == 'gs'
 %
 %    Spectral radius of GS Iteration Matrix
 %         Rg = -(D+L)^(-1) * U
 %
     B = tril(A,0); % D+L
 % $$$     [V, l] = eig(-triu(A,1),B); lambda=diag(l);
 % $$$     [V, l] = eig(B,A); lambda = 1 - 1./diag(l);
     [V, l] = eig(A,B); lambda = 1 - diag(l);
     [lambda, iss] = sort(lambda, 'descend');
     V = V(1:end,iss);
     
     fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(lambda)))
     figure
     subplot(211)
     plot(real(lambda), imag(lambda), 'o')  
     xlabel('Real of eigenvalues'); ylabel(['Imag of ' ...
                             'eigenvalues'])
     axis equal
     title(title_str)
     grid on
     subplot(212)
     plot(k, abs(lambda), 'o-')  
     xlabel('Mode'); ylabel('eigenvalues')
     grid on
 end
 %    
 %   Plot eigenvectors
 neig=size(lambda,1);
 for i=1:neig
     k=mod(i-1,4*5)+1;
     if k==1
         figure
         title(title_str)
     end
     subplot(4,5,k)
     if relax(1:2) == 'ja'
         str = sprintf('Mode = %d, ||R|| = %.4f', i, 1-omega* ...
                       lambda(i));
     else
         str = sprintf('Mode = %d, ||R|| = %.4f', i, ...
                       lambda(i));
     end
     plot(V(:,i)); grid on
     title(str)
 end
diff --git a/matlab/test_relax2d.m b/matlab/test_relax2d.m
index 3e5f778..2370781 100644
--- a/matlab/test_relax2d.m
+++ b/matlab/test_relax2d.m
@@ -1,86 +1,86 @@
 %
 % @file test_relax2d.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='test_relax2d.h5';
 %
 nx=h5readatt(file,'/','NX');
 ny=h5readatt(file,'/','NY');
 kx=h5readatt(file,'/','KX');
 ky=h5readatt(file,'/','KY');
 nidbas1=h5readatt(file,'/','NIDBAS1');
 nidbas2=h5readatt(file,'/','NIDBAS2');
 levels=h5readatt(file,'/','LEVELS');
 relax=h5readatt(file,'/','RELAX');
 title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), KX=%d, KY=%d, relax=%s', ...
                   nx,ny,nidbas1,nidbas2,kx,ky,relax);
 
 
 %
 %   Prolongation matrices at the coarsest grid
 %
 mglevel=sprintf('/mglevels/level.%.2d', levels);
 dset=strcat(mglevel,'/matpx');
 matpx=csr_mat(file,dset);
 dset=strcat(mglevel,'/matpy');
 matpy=csr_mat(file,dset);
 
 %
 %   FE matrix at the finest grid
 %
 levels=1;
 mglevel=sprintf('/mglevels/level.%.2d', levels);
 dset=strcat(mglevel,'/mata');
 [mata,diag]=csr_mat(file,dset);
 
 x=h5read(file,strcat(mglevel,'/x'));
 y=h5read(file,strcat(mglevel,'/y'));
 f=h5read(file,strcat(mglevel,'/f'));
 v=h5read(file,strcat(mglevel,'/v'));
 f1d=h5read(file,strcat(mglevel,'/f1d'));
 v1d=h5read(file,strcat(mglevel,'/v1d'));
 figure
 spy(mata)
 
 %
 %    Solutions at te finest grid
 %
 sol_direct=h5read(file,'/solutions/direct');
 sol_anal=h5read(file,'/solutions/anal');
 figure
 surf(x,y,sol_direct')
 xlabel('X'); ylabel('Y');
 
 %
 %    Relaxations
 %
 errdisc=h5read(file,'/relaxation/errdisc');
 resid=h5read(file,'/relaxation/resid');
 its=0:1:size(errdisc)-1;
 figure
 semilogy(its,errdisc,its,resid)
 legend('Discretisation Error', 'Residual Norm')
 xlabel('Iterations')
 grid on
 title(title_str)
diff --git a/matlab/test_relax2d_cyl.m b/matlab/test_relax2d_cyl.m
index e01c146..78f4c4c 100644
--- a/matlab/test_relax2d_cyl.m
+++ b/matlab/test_relax2d_cyl.m
@@ -1,183 +1,183 @@
 %
 % @file test_relax2d_cyl.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='test_relax2d_cyl.h5';
 %
 nx=h5readatt(file,'/','NX');
 ny=h5readatt(file,'/','NY');
 modem=h5readatt(file,'/','MODEM');
 modep=h5readatt(file,'/','MODEP');
 nidbas1=h5readatt(file,'/','NIDBAS1');
 nidbas2=h5readatt(file,'/','NIDBAS2');
 levels=h5readatt(file,'/','LEVELS');
 omega=h5readatt(file,'/','OMEGA');
 relax=h5readatt(file,'/','RELAX');
 if relax(1:2) == 'ja'
     title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), MODEM=%d, MODEP=%d, relax=%s, omega=%.3f', ...
                   nx,ny,nidbas1,nidbas2,modem,modep,relax, omega);
 else
     title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), MODEM=%d, MODEP=%d, relax=%s,', ...
                   nx,ny,nidbas1,nidbas2,modem,modep,relax);
 end
 
 %
 %   Prolongation matrices at the coarsest grid
 %
 mglevel=sprintf('/mglevels/level.%.2d', levels);
 dset=strcat(mglevel,'/matpx');
 matpx=csr_mat(file,dset);
 dset=strcat(mglevel,'/matpy');
 matpy=csr_mat(file,dset);
 
 %
 %    Solutions at the finest grid
 %
 mglevel=sprintf('/mglevels/level.%.2d', 1);
 x=h5read(file,strcat(mglevel,'/x'));
 y=h5read(file,strcat(mglevel,'/y'));
 f=h5read(file,strcat(mglevel,'/f'));
 v=h5read(file,strcat(mglevel,'/v'));
 f1d=h5read(file,strcat(mglevel,'/f1d'));
 v1d=h5read(file,strcat(mglevel,'/v1d'));
 sol_direct=h5read(file,'/solutions/direct');
 sol_relax=h5read(file,'/solutions/relax');
 sol_anal=h5read(file,'/solutions/anal');
 
 % $$$ figure
 % $$$ surf(x,y,sol_direct')
 % $$$ xlabel('r'); ylabel('\theta');
 % $$$ title(title_str)
 
 % $$$ figure
 % $$$ subplot(211)
 % $$$ plot(x, sol_anal(:,ny/2),x, sol_direct(:,ny/2),'o')
 % $$$ xlabel('r'); ylabel('Direct solution')
 % $$$ grid on
 % $$$ title(title_str)
 % $$$ subplot(212)
 % $$$ if modem == 0
 % $$$     plot(x, sol_anal(1,:),x, sol_direct(1,:),'o')
 % $$$ else
 % $$$     plot(x, sol_anal(nx/2,:),x, sol_direct(nx/2,:),'o')
 % $$$ end
 % $$$ xlabel('\theta'); ylabel('Direct solution')
 % $$$ grid on
 % $$$ title(title_str)
 
 %
 %    Relaxations
 %
 errdisc=h5read(file,'/relaxation/errdisc');
 resid=h5read(file,'/relaxation/resid');
 its=0:1:size(errdisc)-1;
 figure
 semilogy(its,errdisc,its,resid)
 legend('Discretisation Error', 'Residual Norm')
 xlabel('Iterations')
 grid on
 title(title_str)
 
 % $$$ figure
 % $$$ subplot(211)
 % $$$ plot(x, sol_anal(:,ny/2),x, sol_relax(:,ny/2),'o')
 % $$$ xlabel('r'); ; ylabel('Relaxed solution')
 % $$$ grid on
 % $$$ title(title_str)
 % $$$ subplot(212)
 % $$$ if modem == 0
 % $$$     plot(x, sol_anal(1,:),x, sol_relax(1,:),'o')
 % $$$ else
 % $$$     plot(x, sol_anal(nx/2,:),x, sol_relax(nx/2,:),'o')
 % $$$ end
 % $$$ xlabel('\theta'); ylabel('Relaxed solution')
 % $$$ grid on
 % $$$ title(title_str)
 
 figure
 subplot(211)
 plot(x, sol_anal(:,ny/2),x, sol_direct(:,ny/2),'o')
 xlabel('r'); ; ylabel('Direct solution')
 grid on
 title(title_str)
 subplot(212)
 plot(x, sol_anal(1,:),x, sol_direct(1,:),'o')
 xlabel('\theta'); ylabel('Direct solution at the axis')
 grid on
 title(title_str)
 
 %
 %   FE matrix at the finest grid
 %
 levels=1;
 mglevel=sprintf('/mglevels/level.%.2d', levels);
 dset=strcat(mglevel,'/mata');
 [mata,diag]=csr_mat(file,dset);
 n=size(diag,1);
 % $$$ figure
 % $$$ spy(mata)
 % $$$ title(title_str)
 
 if relax(1:2) == 'ja'
     %
     %   Eigenvalues of inv(D)*A
     %
     matd=spdiags(diag,0,n,n);
     lambda = eigs(mata, matd, n);
     om=0:0.01:0.8;
     clear rho;
     for i=1:size(om,2)
         rho(i) = max(abs(1-om(i).*lambda));
     end
     figure
     subplot(211)
     plot(lambda,'o')
     ylabel('Eigenvalue of inv(D)*A')
     grid on
     title(title_str)
     subplot(212)
     plot(om,rho)
     xlabel('\omega'); ylabel('\rho')
     grid on
     omega_c = 2.0/max(lambda);
     lambda_min = eigs(mata, matd, 1, 'SM');
     fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', abs(1-omega*lambda_min))
     str = sprintf('Critical omega = %.3f', omega_c)
     title(str)
 else
 %
 %    Spectral radius of GS Iteration Matrix
 %         Rg = -(D+L)^(-1) * U
 %
     matl= tril(mata,0); % D+L
     lambda = eigs(-triu(mata,1),matl);
     fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(lambda)))
     figure
     plot(lambda, 'o')  
     axis equal
     grid on
     xlabel('Real of eigenvalues'); ylabel('Imag of eigenvalues')
     title(title_str)
 end
 
diff --git a/matlab/test_stencil.m b/matlab/test_stencil.m
index db11cc9..174267a 100644
--- a/matlab/test_stencil.m
+++ b/matlab/test_stencil.m
@@ -1,62 +1,62 @@
 %
 % @file test_stencil.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 if ~exist('file'), file='test_stencil.h5'; end
 
 x = h5read(file, '/xgrid');
 y = h5read(file, '/ygrid');
 b1 = h5read(file, '/barr1');
 b2 = h5read(file, '/barr2');
 b3 = h5read(file, '/barr3');
 n1=size(x,1);
 n2=size(y,1);
 n=n1*n2
 
 mat = stencil_mat(file, '/MAT');
 % $$$ figure
 % $$$ spy(mat)
 
 fprintf('||B1|| = %e\n', norm(reshape(b1,n,1)));
 fprintf('||B2|| = %e\n', norm(reshape(b2,n,1)));
 fprintf('||B3|| = %e\n', norm(reshape(b3,n,1)));
 
 % $$$ figure
 % $$$ subplot(321)
 % $$$ surf(x,y,v'); title('Exact solution')
 % $$$ xlabel('X'); ylabel('Y')
 % $$$ subplot(322)
 % $$$ surf(x,y,f'); title('RHS')
 % $$$ xlabel('X'); ylabel('Y')
 % $$$ subplot(323)
 % $$$ surf(x,y,u'); title('Num. solution')
 % $$$ subplot(324)
 % $$$ surf(x,y,udirect'); title('Direct. solution')
 % $$$ xlabel('X'); ylabel('Y')
 % $$$ subplot(325)
 % $$$ surf(x,y,resids'); title('Residuals')
 % $$$ xlabel('X'); ylabel('Y')
 % $$$ subplot(326)
 % $$$ surf(x,y,errs'); title('Errors')
 % $$$ xlabel('X'); ylabel('Y')
diff --git a/matlab/test_stencilg.m b/matlab/test_stencilg.m
index fb7dccd..fae5111 100644
--- a/matlab/test_stencilg.m
+++ b/matlab/test_stencilg.m
@@ -1,63 +1,63 @@
 %
 % @file test_stencilg.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 if ~exist('file'), file='test_stencilg.h5'; end
 
 x = h5read(file, '/xgrid');
 y = h5read(file, '/ygrid');
 a = h5read(file, '/arr');
 b1 = h5read(file, '/barr1');
 b2 = h5read(file, '/barr2');
 b3 = h5read(file, '/barr3');
 n1=size(x,1);
 n2=size(y,1);
 n=n1*n2
 
 mat = stencil_mat(file, '/MAT');
 % $$$ figure
 % $$$ spy(mat)
 
 fprintf('||B1|| = %e\n', norm(reshape(b1,n,1)));
 fprintf('||B2|| = %e\n', norm(reshape(b2,n,1)));
 fprintf('||B3|| = %e\n', norm(reshape(b3,n,1)));
 
 % $$$ figure
 % $$$ subplot(321)
 % $$$ surf(x,y,v'); title('Exact solution')
 % $$$ xlabel('X'); ylabel('Y')
 % $$$ subplot(322)
 % $$$ surf(x,y,f'); title('RHS')
 % $$$ xlabel('X'); ylabel('Y')
 % $$$ subplot(323)
 % $$$ surf(x,y,u'); title('Num. solution')
 % $$$ subplot(324)
 % $$$ surf(x,y,udirect'); title('Direct. solution')
 % $$$ xlabel('X'); ylabel('Y')
 % $$$ subplot(325)
 % $$$ surf(x,y,resids'); title('Residuals')
 % $$$ xlabel('X'); ylabel('Y')
 % $$$ subplot(326)
 % $$$ surf(x,y,errs'); title('Errors')
 % $$$ xlabel('X'); ylabel('Y')
diff --git a/matlab/test_transf2d.m b/matlab/test_transf2d.m
index 506c5ad..9936481 100644
--- a/matlab/test_transf2d.m
+++ b/matlab/test_transf2d.m
@@ -1,105 +1,105 @@
 %
 % @file test_transf2d.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='test_transf2d.h5';
 %
 nx(1)=h5readatt(file,'/','NX');
 ny(1)=h5readatt(file,'/','NY');
 kx=h5readatt(file,'/','KX');
 ky=h5readatt(file,'/','KY');
 nidbas1=h5readatt(file,'/','NIDBAS1');
 nidbas2=h5readatt(file,'/','NIDBAS2');
 nlevels=h5readatt(file,'/','LEVELS');
 title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), KX=%d, KY=%d', ...
                   nx(1),ny(1),nidbas1,nidbas2,kx,ky);
 %
 % Grid sizes on each levels
 %
 for l=2:nlevels
     nx(l) = nx(l-1)/2;
     ny(l) = ny(l-1)/2;
 end
 %
 %   Prolongation matrices at the coarsest grid
 %
 levels=nlevels;
 mglevel=sprintf('/mglevels/level.%.2d', levels);
 dset=strcat(mglevel,'/matpx');
 matpx=csr_mat(file,dset);
 dset=strcat(mglevel,'/matpy');
 matpy=csr_mat(file,dset);
 
 %
 %   FE matrix at the finest grid
 %
 levels=1;
 mglevel=sprintf('/mglevels/level.%.2d', levels);
 dset=strcat(mglevel,'/mata');
 [mata,diag]=csr_mat(file,dset);
 
 x=h5read(file,strcat(mglevel,'/x'));
 y=h5read(file,strcat(mglevel,'/y'));
 f=h5read(file,strcat(mglevel,'/f'));
 v=h5read(file,strcat(mglevel,'/v'));
 f1d=h5read(file,strcat(mglevel,'/f1d'));
 v1d=h5read(file,strcat(mglevel,'/v1d'));
 figure
 spy(mata)
 
 %
 %    Solutions at the finest grid
 %
 sol_direct=h5read(file,'/solutions/direct');
 sol_anal=h5read(file,'/solutions/anal');
 vfine=h5read(file,'/solutions/vfine');
 figure
 subplot(211)
 surf(x,y,sol_direct')
 xlabel('X'); ylabel('Y');
 title('Direct Solve on the finest grid')
 subplot(212)
 surf(x,y,vfine'-sol_direct')
 xlabel('X'); ylabel('Y');
 title('Prolongation solution on the finest grid')
 %
 %   Errors
 %
 errdisc = h5read(file,'/errors/errdisc');
 resid = h5read(file,'/errors/resid');
 restrict = h5read(file,'/errors/restrict');
 prolong = h5read(file,'/errors/prolong');
 errdisc_prolong = h5read(file,'/errors/disc_err_prolong');
 figure
 subplot(211)
 loglog(nx, errdisc, 'o-', nx(1:end-1), errdisc_prolong, 'h-')
 grid on;
 legend('Direct Solution', 'Prolonged Solution')
 xlabel('N'); ylabel('Discretization Errors')
 title(title_str);
 subplot(212)
 loglog(nx(2:end), restrict, 'o-', nx(1:end-1), prolong, 'h-')
 grid on;
 legend('Restricted RHS', 'Prolonged Solution')
 xlabel('N'); ylabel('Discretization Errors')
diff --git a/matlab/test_transf2d_cyl.m b/matlab/test_transf2d_cyl.m
index 1466f02..63b4c00 100644
--- a/matlab/test_transf2d_cyl.m
+++ b/matlab/test_transf2d_cyl.m
@@ -1,117 +1,117 @@
 %
 % @file test_transf2d_cyl.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='test_transf2d_cyl.h5';
 %
 nx(1)=h5readatt(file,'/','NX');
 ny(1)=h5readatt(file,'/','NY');
 modem=h5readatt(file,'/','MODEM');
 modep=h5readatt(file,'/','MODEP');
 nidbas1=h5readatt(file,'/','NIDBAS1');
 nidbas2=h5readatt(file,'/','NIDBAS2');
 nlevels=h5readatt(file,'/','LEVELS');
 title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), MODEM=%d, MODEP=%d', ...
                   nx(1),ny(1),nidbas1,nidbas2,modem,modep);
 %
 % Grid sizes on each levels
 %
 for l=2:nlevels
     nx(l) = nx(l-1)/2;
     ny(l) = ny(l-1)/2;
 end
 %
 %   Prolongation matrices at the coarsest grid
 %
 levels=nlevels;
 mglevel=sprintf('/mglevels/level.%.2d', levels);
 dset=strcat(mglevel,'/matpx');
 matpx=csr_mat(file,dset);
 dset=strcat(mglevel,'/matpy');
 matpy=csr_mat(file,dset);
 
 %
 %   FE matrix at the finest grid
 %
 levels=1;
 mglevel=sprintf('/mglevels/level.%.2d', levels);
 dset=strcat(mglevel,'/mata');
 [mata,diag]=csr_mat(file,dset);
 
 x=h5read(file,strcat(mglevel,'/x'));
 y=h5read(file,strcat(mglevel,'/y'));
 f=h5read(file,strcat(mglevel,'/f'));
 v=h5read(file,strcat(mglevel,'/v'));
 f1d=h5read(file,strcat(mglevel,'/f1d'));
 v1d=h5read(file,strcat(mglevel,'/v1d'));
 % $$$ figure
 % $$$ spy(mata)
 
 %
 %    Solutions at the finest grid
 %
 sol_direct=h5read(file,'/solutions/direct');
 sol_anal=h5read(file,'/solutions/anal');
 vfine=h5read(file,'/solutions/vfine');
 figure
 subplot(211)
 surf(x,y,sol_direct')
 xlabel('X'); ylabel('Y');
 title('Direct Solve on the finest grid')
 subplot(212)
 surf(x,y,vfine')
 xlabel('X'); ylabel('Y');
 title('Prolongation solution on the finest grid')
 
 figure
 subplot(311)
 plot(x, sol_direct(:,ny(1)/2),x, vfine(:,ny(1)/2),'o')
 xlabel('r');
 grid on
 legend('Direct Solution', 'Prolonged Solution')
 title(title_str)
 subplot(313)
 plot(y, sol_direct(1,:),y, vfine(1,:),'o')
 xlabel('\theta'); ylabel('On axis')
 grid on
 title(title_str)
 subplot(312)
 plot(y, sol_direct(nx(1)/2,:),y, vfine(nx(1)/2,:),'o')
 xlabel('\theta'); ylabel('Off axis')
 grid on
 title(title_str)
 %
 %   Errors
 %
 errdisc = h5read(file,'/errors/errdisc');
 resid = h5read(file,'/errors/resid');
 restrict = h5read(file,'/errors/restrict');
 prolong = h5read(file,'/errors/prolong');
 errdisc_prolong = h5read(file,'/errors/disc_err_prolong');
 figure
 loglog(nx, errdisc, 'o-', nx(2:end), errdisc_prolong, 'h-')
 grid on;
 legend('Direct Solution', 'Prolonged Solution')
 xlabel('N'); ylabel('Discretization Errors')
 title(title_str);
diff --git a/matlab/tpardiso.m b/matlab/tpardiso.m
index 5efb0a1..7609732 100644
--- a/matlab/tpardiso.m
+++ b/matlab/tpardiso.m
@@ -1,72 +1,72 @@
 %
 % @file tpardiso.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 mat='/MAT1';
 gbmat;
 clear S gb_mat;
 
 % Diagonal balancing of matrix
 dbal = 1./sqrt(diag(a));
 a = diag(dbal)*a*diag(dbal);
 
 file='tpardiso.h5';
 mat='/MAT';
 n=hdf5read(file,strcat(mat,'/N'));
 nz=hdf5read(file,strcat(mat,'/NZ'));
 
 irow=hdf5read(file,strcat(mat,'/irow'));
 cols=hdf5read(file,strcat(mat,'/cols'));
 val=hdf5read(file,strcat(mat,'/val'));
 perm=hdf5read(file,strcat(mat,'/perm'));
 
 amat=zeros(n,n);
 
 % Check PARDISO mat
 for i=1:n
     for k=irow(i):irow(i+1)-1
         j=cols(k);
         amat(i,j) = val(k);
         amat(j,i) = val(k);
     end
 end
 err = a-amat;
 errmx = max(max(abs(err)));
 fprintf(1,'Max. error = %e\n', errmx);
 
 figure
 spy(sparse(amat(perm,perm)),'r.');
 LABEL=sprintf('n = %d, nz =%d', n, nz);
 title(LABEL)
 
 % $$$ pmat=zeros(n);
 % $$$ for i=1:n
 % $$$     pmat(i,perm(i))=1;
 % $$$ end
 % $$$ amod=pmat*amat*pmat';
 % $$$ S=sparse(amod);
 % $$$ figure
 % $$$ spy(S,'r.');
 % $$$ LABEL=sprintf('n = %d', n);
 % $$$ title(LABEL)
diff --git a/matlab/two_grid.m b/matlab/two_grid.m
index 3be72de..d6a6a0d 100644
--- a/matlab/two_grid.m
+++ b/matlab/two_grid.m
@@ -1,92 +1,92 @@
 %
 % @file two_grid.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 file='two_grid.h5';
 %
 nx=h5readatt(file,'/','NX');
 nidbas=h5readatt(file,'/','NIDBAS');
 levels=h5readatt(file,'/','LEVELS');
 title_str=sprintf('NX = %d, NIDBAS = %d, levels = %d', nx, nidbas, levels);
 %
 % Read matrices at coarset grid
 %
 for lev=2:levels
 %
 %  FE mat at fine grid
 mglevel=sprintf('/mglevels/level.%.2d', lev-1);
 dset=strcat(mglevel,'/mata');
 ku=h5readatt(file,dset,'KU'); kl=ku;
 n=h5readatt(file,dset,'RANK');
 gbmat=h5read(file,dset);
 mata_f=zeros(n,n);
 for i=1:n
     jmin = max(1,i-kl);
     jmax = min(n,i+ku);
     for j=jmin:jmax
         ib = kl+ku+i-j+1;
         mata_f(i,j)=gbmat(ib,j);
     end
 end
 dset=strcat(mglevel,'/f');
 f_fine = h5read(file,dset);
 dset=strcat(mglevel,'/v');
 v_fine = h5read(file,dset);
 %
 %  FE mat at coarse grid
 mglevel=sprintf('/mglevels/level.%.2d', lev);
 dset=strcat(mglevel,'/mata');
 ku=h5readatt(file,dset,'KU'); kl=ku;
 n=h5readatt(file,dset,'RANK');
 gbmat=h5read(file,dset);
 mata_c=zeros(n,n);
 for i=1:n
     jmin = max(1,i-kl);
     jmax = min(n,i+ku);
     for j=jmin:jmax
         ib = kl+ku+i-j+1;
         mata_c(i,j)=gbmat(ib,j);
     end
 end
 dset=strcat(mglevel,'/f');
 f_coarse = h5read(file,dset);
 dset=strcat(mglevel,'/v');
 v_coarse = h5read(file,dset);
 %
 %  Prolong mat
 dset=strcat(mglevel,'/matp');
 matp=h5read(file,dset);
 %
 %   Check
 fprintf(1,'Level %d: ||A_coarse - P''*A_fine*P|| = %g\n', lev, norm(matp'*mata_f*matp ...
                                                   - mata_c))
 end
 %
 
 v_prolong = h5read(file,'/v_prolong');
 
 figure
 plot(v_fine)
 hold on
 plot(v_prolong, 'r')
diff --git a/matlab/zcsr_mat.m b/matlab/zcsr_mat.m
index 0b1b491..9411e3c 100644
--- a/matlab/zcsr_mat.m
+++ b/matlab/zcsr_mat.m
@@ -1,44 +1,44 @@
 %
 % @file zcsr_mat.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 function [mata, diag] = zcsr_mat(file, dset)
     n=hdf5read(file,dset, 'RANK');
     nnz=hdf5read(file,dset, 'NNZ');
     cols=hdf5read(file, strcat(dset,'/cols')); 
     irow=hdf5read(file, strcat(dset,'/irow')); 
     val=h5Complex_ll(file, strcat(dset,'/val')); 
     idiag=hdf5read(file, strcat(dset,'/idiag')); 
     for i=1:n
         s = irow(i);
         e = irow(i+1)-1;
         rows(s:e) = i;    
     end
     cols=double(cols);
     rows=double(rows);
     mata = sparse(rows,cols,val);
     if nargout == 2
         diag = val(idiag);
     end
     
diff --git a/matlab/zmumps_mat.m b/matlab/zmumps_mat.m
index 2db15b7..4558886 100644
--- a/matlab/zmumps_mat.m
+++ b/matlab/zmumps_mat.m
@@ -1,44 +1,44 @@
 %
 % @file zmumps_mat.m
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 function [mata, diag] = zmumps_mat(file, dset)
     n=hdf5read(file,dset, 'RANK');
     Nnz=hdf5read(file,dset, 'NNZ');
     cols=hdf5read(file, strcat(dset,'/cols')); 
     irow=hdf5read(file, strcat(dset,'/irow')); 
     irn=hdf5read(file,  strcat(dset,'/mumps_par/IRN'));
     val=h5Complex_ll(file, strcat(dset,'/val')); 
     idiag=int32(find((irn-cols)==0));
     for i=1:n
         s = irow(i);
         e = irow(i+1)-1;
         rows(s:e) = i;
     end
     cols=double(cols);
     rows=double(rows);
     mata = sparse(rows,cols,val);
     if nargout == 2
         diag = val(idiag);
     end
diff --git a/multigrid/CMakeLists.txt b/multigrid/CMakeLists.txt
index 27779d3..b99a423 100644
--- a/multigrid/CMakeLists.txt
+++ b/multigrid/CMakeLists.txt
@@ -1,32 +1,32 @@
 #
 # @file CMakeLists.txt
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Nicolas Richart <nicolas.richart@epfl.ch>
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 add_subdirectory(src)
 if(BSPLINES_EXAMPLES)
   add_subdirectory(wk)
 endif()
 
 #add_subdirectory(halpern)
diff --git a/multigrid/docs/Makefile b/multigrid/docs/Makefile
index e144969..d18fa33 100644
--- a/multigrid/docs/Makefile
+++ b/multigrid/docs/Makefile
@@ -1,52 +1,52 @@
 #
 # @file Makefile
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 all:	multigrid.pdf multigrid_2d.pdf mg_gbs.pdf
 
 grid.eps: grid.tex
 	tex grid.tex
 	dvips -o grid.eps grid.dvi
 
 mg_gbs.dvi: grid.eps
 
 clean:
 	rm -f grid.eps *.dvi *.log *.aux *~ *.toc *.flc *.bbl *.blg *.out *~
 
 distclean: clean
 
 .SUFFIXES:
 .SUFFIXES: .tex .dvi .pdf
 
 %.dvi: %.tex
 	latex $<
 	@while ( grep "Rerun to get cross-references" \
 	         ${<:tex=log} > /dev/null ); do \
 	         latex $<; \
 	done
 	latex $<
 
 %.pdf: %.dvi
 	dvipdf $<
 
diff --git a/multigrid/docs/grid.tex b/multigrid/docs/grid.tex
index 13b90a0..40091d9 100644
--- a/multigrid/docs/grid.tex
+++ b/multigrid/docs/grid.tex
@@ -1,70 +1,70 @@
 %
 % @file grid.tex
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 
 \hoffset=-2truecm
 \voffset=-2truecm
 \special{papersize=9cm,5cm}
 \hsize=9truecm
 \vsize=5truecm
 
 \parindent=0pt
 \nopagenumbers
 
 \input pstricks
 
 \pspicture(4,4)
   \psline[linestyle=solid](0,0)(8,0)
   \psline[linestyle=dotted](0,1)(8,1)
   \psline[linestyle=solid](0,2)(8,2)
   \psline[linestyle=dotted](0,3)(8,3)
   \psline[linestyle=solid](0,4)(8,4)
 
   \psline[linestyle=solid](0,0)(0,4)
   \psline[linestyle=dotted](1,0)(1,4)
   \psline[linestyle=solid](2,0)(2,4)
   \psline[linestyle=dotted](3,0)(3,4)
   \psline[linestyle=solid](4,0)(4,4)
   \psline[linestyle=dotted](5,0)(5,4)
   \psline[linestyle=solid](6,0)(6,4)
   \psline[linestyle=dotted](7,0)(7,4)
   \psline[linestyle=solid](8,0)(8,4)
 
   \psdots[dotstyle=square,dotscale=2](0,0)(2,0)(4,0)(6,0)(8,0)
   \psdots[dotstyle=square,dotscale=2](0,2)(2,2)(4,2)(6,2)(8,2)
   \psdots[dotstyle=square,dotscale=2](0,4)(2,4)(4,4)(6,4)(8,4)
 
   \psdots[dotstyle=*](0,0)(1,0)(2,0)(3,0)(4,0)(5,0)(6,0)(7,0)(8,0)
   \psdots[dotstyle=*](0,1)(1,1)(2,1)(3,1)(4,1)(5,1)(6,1)(7,1)(8,1)
   \psdots[dotstyle=*](0,2)(1,2)(2,2)(3,2)(4,2)(5,2)(6,2)(7,2)(8,2)
   \psdots[dotstyle=*](0,3)(1,3)(2,3)(3,3)(4,3)(5,3)(6,3)(7,3)(8,3)
   \psdots[dotstyle=*](0,4)(1,4)(2,4)(3,4)(4,4)(5,4)(6,4)(7,4)(8,4)
 
 \endpspicture
 
 
 \bye
 
 
diff --git a/multigrid/docs/mg_gbs.tex b/multigrid/docs/mg_gbs.tex
index 4c1eec0..4fff2b6 100644
--- a/multigrid/docs/mg_gbs.tex
+++ b/multigrid/docs/mg_gbs.tex
@@ -1,1623 +1,1623 @@
 %
 % @file mg_gbs.tex
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 \documentclass[a4paper]{article}
 \usepackage{linuxdoc-sgml}
 \usepackage{graphicx}
 \usepackage{hyperref}
 %\usepackage{amsmath}
 \usepackage{amssymb}
 \usepackage{mathtools}
 \usepackage{placeins}
 \usepackage{multirow}
 \usepackage{latexsym}
 \usepackage{listings}
 \usepackage{xcolor}
 \usepackage{rotating}
 \def\RepFigures{FIGURES_mg_gbs}
 
 
 \title{\tt Multigrid Solver for GBS}
 \author{Trach-Minh Tran, Federico Halpern\\CRPP/EPFL}
 \date{v1.0, June 2015}
 
 \begin{document}
 \maketitle
 \tableofcontents
 
 \section{The PDE}
 The PDE considered is
 \begin{equation}
 \label{eq:pde}
   \left[\frac{\partial^2}{\partial x^2} + \tau\frac{\partial^2}{\partial
   x\partial y} + \frac{\partial^2}{\partial y^2} - a(x,y)\right] u(x,y)  =
   f(x,y), \qquad 0\le x \le L_x, \; 0\le y \le L_y.
 \end{equation}
 On the four boundaries, homogeneous Dirichlet boundary condition
 $u=0$ as well as Neumann boundary condition
 $\partial u/\partial n=0$  can be applied.
 
 \section{Discretization}
 The grid points $(x_i,y_j)$ are defined by
 \begin{equation}
   \begin{split}
     x_i &= ih_x = i\frac{L_x}{N_x}, \quad i=0,\ldots, N_x \\
     y_j &= jh_y = j\frac{L_y}{N_y}, \quad j=0,\ldots, N_y \\
   \end{split}
 \end{equation}
 
 Second order Finite Difference discretization of Eq.\ref{eq:pde} leads
 to the following 9-point stencil
 
 \begin{equation}
 \label{eq:stencil}
   S_{ij} = \frac{1}{h_x^2}
   \begin{bmatrix}
     -\tau\alpha/4   & \alpha^2                  & \tau\alpha/4 \\
           1         & -2(1+\alpha^2)-h_x^2a_{ij} & 1 \\
     \tau\alpha/4    & \alpha^2                  &-\tau\alpha/4 \\
   \end{bmatrix}
 , \qquad \mbox{where $\alpha=h_x/h_y$}.
 \end{equation}
 
 Note that the mesh aspect ratio $\alpha$ results in the same stencil
 for the \emph{anisotropic} Poisson equation with $h_x=h_y$:
 \begin{equation}
    \frac{\partial^2 u}{\partial x^2} + \alpha^2
      \frac{\partial^2 u}{\partial y^2} = f.
 \end{equation}
 
 It is shown in \cite[p.~119]{Briggs} that this anisotropy can degrade
 the performance of multigrid using  standard relaxations such as
 Gauss-Seidel or damped Jacobi can be strongly degraded.
 
 With the \emph{lexicographic} numbering
 \begin{equation}
   I = j(N_x+1) + i+1, 
 \end{equation}
 for the $(N_x+1)(N_y+1)$ nodes, the discretized problem can be
 expressed as a matrix problem
 \begin{equation}
 \label{eq:matrix}
   \mathbf{Au} = \mathbf{f},
 \end{equation}
 where $\mathbf{A}$ is a 9-diagonal matrix,  assembled using the stencil defined
 above. Homogeneous Dirichlet boundary condition can be imposed, for example, on
 the face $j=0$ simply by \emph{clearing} the matrix rows and columns
 $1,2,\ldots, N_x+1$, and setting the diagonal terms to 1.
 
 Neumann boundary condition $\partial u/\partial x=0$ at
 the face $i=0$, can be simply implemented by imposing
 $u_{-1j}=u_{1j}$. The stencil for the boundary nodes $(0,j)$
 can thus be modified as
 
 \begin{equation}
   S_{0j} = \frac{1}{h_x^2}
   \begin{bmatrix}
      0 &\alpha^2                  & 0 \\
      0 &-2(1+\alpha^2)-h_x^2a_{0j} & 2 \\
      0 &\alpha^2                  & 0 \\
   \end{bmatrix}
 .
 \end{equation}
 
 Two model problems are considered in this report:
 \begin{description}
 \item[\texttt{\textbf{DDDD}} problem:] Homogeneous Dirichlet BC
   at all the 4 boundaries. The \emph{analytic solution} is
   \begin{equation}
     u(x,y) = \sin\frac{2\pi k_xx}{L_x}\sin\frac{2\pi k_yy}{L_y},
     \qquad \mbox{where $k_x$, $k_y$ are positive integers}.
   \end{equation}
 \item[\texttt{\textbf{NNDD}} problem:] Neumann boundary conditions
   $\partial u/\partial x=0$ at $x=0$ and $x=L_x$, homogeneous
   Dirichlet BC at $y=0$ and $y=L_y$. The \emph{analytic solution} is
   \begin{equation}
     u(x,y) = \cos\frac{2\pi k_xx}{L_x}\sin\frac{2\pi k_yy}{L_y},
     \qquad \mbox{where $k_x$, $k_y$ are positive integers}.
   \end{equation}
 \end{description}
 In both problems, $a$ depends only on $x$:
 \begin{equation}
   \label{eq:density}
   a(x,y)= \exp\left[-\frac{(x-L_x/3)^2}{(L_x/2)^2}\right].
 \end{equation}
 
 The sparse direct solver MUMPS \cite{MUMPS} is used to solve (\ref{eq:matrix}) in
 order to check the convergence of the schema described
 above. Fig.\ref{fig:convergence} shows the expected \emph{quadratic}
 convergence of the error with respect to $h_x$ with fixed $\alpha=h_x/h_y=0.5$ for
 both problems, when the grid size is varied from $16\times 64$ to $512\times 2048$.
 
 \begin{figure}[hbt]
   \centering
   \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/convergence}
   \caption{Convergence of the error
     $\| u_{calc}- u_{anal}\|_\infty$ wrt the number of intervals
   in the $x$ direction $N_x$ for $L_x=100$, $L_y=800$, $k_x=k_y=4$, $\tau=1$ and $N_y=4N_x$.}
   \label{fig:convergence}
 \end{figure}
 
 \section{Multigrid $V$-cycle}
 \label{sec-mgProc}
 Given an approximate  $\mathbf{u}^h$ and right hand side $\mathbf{f}^h$ 
 defined at some grid level  represented by the grid spacing $h$, the
 following MG $V$-cycle procedure
 \begin{equation*}
   \boxed{\mathbf{u}^h \leftarrow MG^h(\mathbf{u}^h,\mathbf{f}^h)}
 \end{equation*}
 will compute a \emph{new} $\mathbf{u}^h$. It is defined recursively by the
 following steps: 
 
 \begin{enumerate}
 \item If $h$ is the coarsest mesh size, 
   \begin{itemize}
   \item  Direct solve $\mathbf{A}^h\mathbf{u}^h=\mathbf{f}^h$ 
   \item Goto 3.
   \end{itemize}
 \item Else 
   \begin{itemize}
     \item Relax $\mathbf{u}^h$ $\nu_1$ times.
     \item $\mathbf{f}^{2h} \leftarrow {\mathbf{R}}(\mathbf{f}^h-\mathbf{A}^h\mathbf{u}^h)$.
     \item $\mathbf{u}^{2h} \leftarrow MG^{2h}(\mathbf{u}^{2h},\mathbf{f}^{2h})$ $\mu$ times.
     \item $\mathbf{u}^h\leftarrow
       \mathbf{u}^h+{\mathbf{P}}\mathbf{u}^{2h}$.
     \item Relax $\mathbf{u}^h$ $\nu_2$ times.
     \end{itemize} 
 \item Return
 \end{enumerate}
 
 In the procedure above, the operators $\mathbf{R}$ and $\mathbf{P}$
 denote respectively the \emph{restriction} (from \emph{fine} to
 \emph{coarse} grid) and the \emph{prolongation} (from \emph{coarse} to
 \emph{fine} grid). Notice that in this multigrid procedure,
 $\mathbf{R}$ applies only to the \emph{right hand side} while $\mathbf{P}$
 applies only to the \emph{solution}. The standard $V(\nu_1,\nu_2)$ cycle is obtained by
 calling this $MG^h$ procedure with $\mathbf{f}^h$ defined at the
 \emph{finest} grid level, a guess $\mathbf{u}^h=0$ and $\mu=1$, while 
 $\mu=2$ results in the $W(\nu_1,\nu_2)$ cycle.
 
 Details on the grid coarsening, the inter-grid transfers and methods
 of relaxation are given in the following.
 
 \subsection{Grid coarsening}
 Let start with the one-dimensional \emph{fine} grid defined
 by $x_i,\, i=0,\ldots,N$, assuming that $N$ is even. The next coarse grid
 (with $N/2$ intervals) is obtained by simply discarding the grid
 points with \emph{odd} indices. 
 
 In order to get a \emph{smallest coarsest} grid (so that it is possible
 to solve \emph{cheaply} the problem with a \emph{direct} method), $N$ should be
 $N=N_c2^{L-1}$ where $L$ the total number of grid levels and $N_c$ is either
 2 or a \emph{small odd} integer. As an example, the fine grid with $N=768$ can have
 up to 9 grid levels, and a coarsest grid with 3 intervals, see
 Table~\ref{tab:level}.
 
 \begin{table}[hbt]
 \centering
 \begin{tabular}{|l||r|r|r|r|r|}\hline
 $L$ & \multicolumn{5}{c|}{$N$} \\ \hline
   1 &    2 &    3 &    5 &    7 &    9\\
   2 &    4 &    6 &   10 &   14 &   18\\
   3 &    8 &   12 &   20 &   28 &   36\\
   4 &   16 &   24 &   40 &   56 &   72\\
   5 &   32 &   48 &   80 &  112 &  144\\
   6 &   64 &   96 &  160 &  224 &  288\\
   7 &  128 &  192 &  320 &  448 &  576\\
   8 &  256 &  384 &  640 &  896 & 1152\\
   9 &  512 &  768 & 1280 & 1792 & 2304\\
  10 & 1024 & 1536 & 2560 & 3584 & 4608\\
 \hline
 \end{tabular}
 \caption{Set of values of the \emph{fine} grid number of intervals $N$
   to obtain a \emph{coarsest} grid size at most equal to $9$ with
   at most $10$ grid levels.}
 \label{tab:level}
 \end{table}
 
 For a two-dimensional grid, the same procedure is applied to both
 dimensions. The result of such procedure is illustrated in
 Fig.~\ref{fig:2d_coarsening}, for a $8\times4$ fine grid.
 
 \begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=0.6\hsize]{grid}
   \caption{A \emph{coarse} $4\times 2$ grid ($\Box$) obtained from a
     $8\times4$ fine grid ($\bullet$).}
   \label{fig:2d_coarsening}
 \end{figure}
 
 
 \subsection{Inter-grid transfers}
 The one-dimensional \emph{prolongation} operator for the second-order FD
 discretization is chosen the same as the one obtained with the
 \emph{linear Finite Elements} \cite{MG1D}. For a $N=8$ grid, it can be
 represented as a $9\times 5$ matrix given by
 \begin{equation}
   \label{eq:1dprolongation}
   \mathbf{P} =
   \left(
   \begin{matrix}
     1   & 0   & 0   & 0   & 0  \\
     1/2 & 1/2 & 0   & 0   & 0  \\
     0   & 1   & 0   & 0   & 0  \\
     0   & 1/2 & 1/2 & 0   & 0  \\
     0   & 0   & 1   & 0   & 0  \\
     0   & 0   & 1/2 & 1/2 & 0  \\
     0   & 0   & 0   & 1   & 0  \\
     0   & 0   & 0   & 1/2 & 1/2\\
     0   & 0   & 0   & 0   & 1   \\
   \end{matrix}\right)
 \end{equation}
 The \emph{restriction} matrix $\mathbf{R}$ is simply related to $\mathbf{P}$ by
 \begin{equation}
   \label{eq:1drestriction}
   \mathbf{R} = \frac{1}{2}\mathbf{P}^{T}=\frac{1}{2}\left(
   \begin{matrix}
     1&1/2&0&0&0&0&0&0&0\\
     0&1/2&1&1/2&0&0&0&0&0\\
     0&0&0&1/2&1&1/2&0&0&0\\
     0&0&0&0&0&1/2&1&1/2&0\\
     0&0&0&0&0&0&0&1/2&1\\
   \end{matrix}
   \right).
 \end{equation}
 For Dirichlet BC imposed on the \emph{left} boundary one has to set
 ${P}_{21}=R_{12}=0$, while for Dirichlet BC imposed on the \emph{right}
 boundary, ${P}_{N,N/2+1}=R_{N/2+1,N}=0$. Notice that these inter-grid operators are
 identical to the standard \emph{linear interpolation} and \emph{full
   weighting} operators.
 
 For a two-dimensional problem, using the property that the grid is a
 \emph{tensor product} of two one-dimensional grids, the
 restriction of the right hand side $f^{h}_{ij}$ and the prolongation of
 the solution $u^{2h}_{ij}$ can be computed as
 \begin{equation}
   \label{eq:2dintergrid}
   \begin{split}
     \mathbf{f}^{2h} &= \mathbf{R}^x \cdot \mathbf{f}^{h}\cdot (\mathbf{R}^y)^T \\     
     \mathbf{u}^{h}  &= \mathbf{\mathbf{P}}^x \cdot \mathbf{u}^{2h}\cdot
     (\mathbf{\mathbf{P}}^y)^T \\     
   \end{split}
 \end{equation}
 
 \subsection{Relaxations}
 Gauss-Seidel and damped Jacobi iterations are used as relaxation
 methods in the multigrid $V$ cycle. In general, Gauss-Seidel
 method is  more efficient but much more difficult to
 \emph{parallelize} than the Jacobi method.
 
 It should be noted that if $a(x,y)$ in Eq.~\ref{eq:pde} is
 non-positive, both relaxations diverge! This can be seen by
 considering the following one-dimensional FD equation with uniform
 $a$:
 
 \begin{equation}
   u_{j-1} -(2+ah^2)u_j + u_{j+1} = h^2f_j.
 \end{equation}
 
 Using the damped Jacobi relaxation, the error
 $\epsilon^{(m)}_j\equiv u_{anal}(x_j)-u_j^{(m)}$ at iteration $m+1$ is given by
 
 \begin{equation}
   \epsilon^{(m+1)}_j =
   \frac{\omega}{2+h^2a}(\epsilon^{(m)}_{j-1}+\epsilon^{(m)}_{j+1})
     +(1-\omega)\epsilon^{(m)}_j .
 \end{equation}
 Performing a \emph{local mode analysis} (or Fourier analysis) (see
 \cite[p.~48]{Briggs}), assuming that
 $\epsilon^{(m)}_j=A(m)e^{ij\theta}$, where $\theta$ is related to the
 mode number $k$ by $\theta=2\pi k/N$, the 
 \emph{amplification factor} $G(\theta)$ is obtained as
 \begin{equation}
   \begin{split}
     G(\theta) &= \frac{A(m+1)}{A(m)} =
     \frac{2\omega}{2+h^2a}\cos\theta + (1-\omega) \\
     &= G_0(\theta) -\frac{ \omega h^2a}{2+h^2a}\cos\theta 
     \simeq  G_0(\theta) -\frac{ \omega h^2a}{2}\cos\theta, \\
  G_0(\theta) &=1-2\omega\sin^2\frac{\theta}{2},  \\
   \end{split}
 \end{equation}
 where $G_0(\theta)$ is the amplification factor for
 $a=0$. Note that $|G_0(\theta)|< 1$ for \emph{all} $\theta$ and $0<\omega< 1$
 but $\displaystyle{\max_{|\theta|<\pi}|G(\theta)|>1}$ if $a<0$.
 
 In Gauss-Seidel relaxation method, the errors evolve as:
 \begin{equation}
   \epsilon^{(m+1)}_j = \frac{\epsilon^{(m+1)}_{j-1}+\epsilon^{(m)}_{j+1}}{2+h^2a}.
 \end{equation}
 Applying again the same Fourier analysis yields the
 following complex amplification factor:
 \begin{equation}
   \begin{split}
     G(\theta) &\simeq G_0(\theta)\left(1-\frac{h^2a}{2-e^{-i\theta}}\right) \\
     G_0(\theta) &=\frac{e^{i\theta}}{2-e^{-i\theta}}, \quad
     |G_0(\theta)| < 1
   \end{split}
 \end{equation}
 which show that the Gauss-Seidel relaxations \emph{diverge} if $a<0$.
 
 Notice that when $a>0$, the effect of $a$ on the amplification is
 negligible and is thus ignored in the following two-dimensional
 analysis. Applying the damped Jacobi scheme on the
 stencil~(\ref{eq:stencil}), the error at the iteration $m+1$ is given
 by:
 \begin{equation}
   \begin{split}
   \epsilon^{(m+1)}_{ij} = & \frac{\omega}{2(1+\alpha^2)} \left[
     \epsilon^{(m)}_{i-1,j} + \epsilon^{(m)}_{i+1,j} + \alpha^2(
     \epsilon^{(m)}_{i,j-1} + \epsilon^{(m)}_{i,j+1}) + \frac{\tau\alpha}{4}(
     \epsilon^{(m)}_{i+1,j+1}+\epsilon^{(m)}_{i-1,j-1} -
     \epsilon^{(m)}_{i-1,j+1}-\epsilon^{(m)}_{i+1,j-1})
     \right] \\
   & + (1-\omega)\epsilon^{(m)}_{ij}. \\
   \end{split}
 \end{equation}
 Using the two-dimensional Fourier mode expression
 \begin{equation}
   \epsilon^{(m)}_{ij} = A(m)e^{i(\theta_1+\theta_2)}, \quad -\pi <\theta_1,
   \theta_2 \le \pi,
 \end{equation}
 the amplification factor $G=A(m+1)/A(m)$ is given
 by
 \begin{equation}
 \label{eq:amp_jac}
   G(\theta_1,\theta_2;\omega,\alpha,\tau)=1 - \frac{2\omega}{1+\alpha^2} \left(
     \sin^2\frac{\theta_1}{2} +  \alpha^2\sin^2\frac{\theta_2}{2} +
     \frac{\tau\alpha}{4}\sin\theta_1\,\sin\theta_2
     \right).
 \end{equation}
 
 The errors in Gauss-Seidel method, assuming a \emph{lexicographic} ordering
 for the unknowns (increasing first $i$ then $j$), are updated according
 to
 \begin{equation}
   \epsilon^{(m+1)}_{ij} =  \frac{1}{2(1+\alpha^2)} \left[
     \epsilon^{(m+1)}_{i-1,j} + \epsilon^{(m)}_{i+1,j} + \alpha^2(
     \epsilon^{(m+1)}_{i,j-1} + \epsilon^{(m)}_{i,j+1}) + \frac{\tau\alpha}{4}(
     \epsilon^{(m)}_{i+1,j+1}+\epsilon^{(m+1)}_{i-1,j-1} -
     \epsilon^{(m)}_{i-1,j+1}-\epsilon^{(m+1)}_{i+1,j-1})
     \right].
 \end{equation}
 The Fourier mode analysis then leads to the following complex
 amplification factor
 \begin{equation}
 \label{eq:amp_gs}
    G(\theta_1,\theta_2;\alpha,\tau) =
    \frac{e^{i\theta_1} + 
      \left(\alpha^2+i\dfrac{\tau\alpha}{2}\sin\theta_1\right)e^{i\theta_2}}
    {2(1+\alpha^2) - \left[e^{-i\theta_1}+\left(\alpha^2 -
      i\dfrac{\tau\alpha}{2}\sin\theta_1\right)e^{-i\theta_2}\right]}.
 \end{equation}
 
 Curves of $G$ for \emph{fixed} $\theta_2$ are plotted in
 Fig.~\ref{fig:fourier_jac} showing \emph{convergence} ($\max|G|<1$) for
 $\tau=-1,0,1,2$, using the damped Jacobi method. The 
 same conclusions are obtained for Gauss-Seidel relaxations as shown
 in Fig.~\ref{fig:fourier_gs} where the absolute values 
 of the complex amplification factor $G$ are plotted. However, for larger
 $|\tau|>2$, both methods diverge as can be seen in
 Fig.~\ref{fig:relax_diverge}.Notice however that the PDE
 (\ref{eq:pde}) is \emph{elliptic} only when $|\tau|<2$ is satisfied!
 
 \begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=0.85\hsize]{\RepFigures/fourier_jac}
   \caption{Amplification factor for damped Jacobi relaxations with
     $\omega=0.8$ and $\alpha=h_x/h_y=1$ and $\tau=-1,0,1,2$ displayed as curves of
     constant $\theta_2$. $\theta_2=0$ on the \emph{green} curve and $\pi$ on
   the \emph{red} curve.}
   \label{fig:fourier_jac}
 \end{figure}
 
 \begin{figure}[hbt]
   \centering
   \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/fourier_gs}
   \caption{Absolute value of the amplification factor for Gauss-Seidel
     relaxations with $\alpha=h_x/h_y=1$ and $\tau=-1,0,1,2$, displayed as curves of
     constant $\theta_2$. $\theta_2=0$ on the \emph{green} curve and $\pi$ on
   the \emph{red} curve.}
   \label{fig:fourier_gs}
 \end{figure}
 
 \begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/relax_diverge}
   \caption{Amplification factor for Jacobi (left) and Gauss-Seidel (right)
     relaxations for $|\tau|=3,5$, $\alpha=h_x/h_y=1$, displayed as curves of
     constant $\theta_2$. $\theta_2=0$ on the \emph{green} curve and $\pi$ on
   the \emph{red} curve.}
   \label{fig:relax_diverge}
 \end{figure}
 
 In summary, the local mode analysis predicts that
 \begin{itemize}
 \item Negative values of the coefficient $a$ and large
   mixed derivative ($|\tau| > 2$) can make both damped Jacobi and
   Gauss-Seidel relaxations diverge.
 \item Positive values of $a$ can decrease the amplification factor
   (improving thus the convergence rate) but
   its contributions $h^2a$ decrease for increasing grid resolution.
 \end{itemize}
 These predictions will be checked against numerical experiments in the
 next section.
 
 \FloatBarrier
 \section{Numerical Experiments}
 \label{sec:NumExp1}
 In the following numerical experiments, we look at the convergence
 rate of the residual norm and the error norm which are defined at the
 iteration $m$ by
 \begin{equation}
   \begin{split}
     r^{(m)} &= \|\mathbf{f}-\mathbf{A}\mathbf{u}^{(m)}\|_\infty, \\
     e^{(m)} &= \|\mathbf{u}^{(m)}-\mathbf{u}_{anal}\|_\infty.\\
   \end{split}
 \end{equation}
 
 The iterations are stopped when the number of iterations reach an
 user supplied \emph{maximum} of iterations or when the residual norm
 is smaller than either a given \emph{relative} tolerance {\tt rtol}
 \cite[p.~51]{TEMPL} or \emph{absolute} tolerance {\tt atol}:
 \begin{equation}
   \begin{split}
     r^{(m)} &< \mbox{\tt rtol}
       \cdot(\|\mathbf{A}\|_\infty\cdot\|\mathbf{u}^{(m)}\|_\infty + 
       \|\mathbf{f}\|_\infty), \\
     r^{(m)} &<  \mbox{\tt atol}.\\
   \end{split}
 \end{equation}
 
 An additional stopping criteria consists of stopping the
 iterations when the change of the discretization error between
 successive iteration is small enough:
 \begin{equation}
   \frac{e^{(m)}-e^{(m-1)}}{e^{(m-1)}}<    \mbox{\tt etol}.
 \end{equation}
 
 \subsection{$V$-cycle performances}
 Table~\ref{tab:iternum} shows the numbers of $V$-cycles required to
 reach the \emph{relative tolerance} 
 $\mbox{\tt rtol}=10^{-8}$. In these runs where $\alpha=0.5$, $\tau=1$
 and $a(x,y)$ given by Eq.~\ref{eq:density}, we observe that the
 biggest improvement is obtained at $\nu_1=\nu_2=2$. For larger
 $\nu_1,\nu_2$, the number of required iterations is relatively insensitive 
 to the grid sizes. As can be seen in Fig.~\ref{fig:mg_iterations}
 which plots the evolution of the error $e^{(m)}$, it
 is clear that the level of discretization error has been largely
 reached. Finally the times used by these runs are shown in
 Fig.~\ref{fig:dddd_pc220} and Fig.~\ref{fig:nndd_pc220} where the
 times spent in the direct solver using MUMPS \cite{MUMPS} are included
 for comparison. For the $512\times 2048$ grid (the largest
 case using the direct solver), the multigrid $V(3,3)$ is about $30$
 times faster!
 
 \begin{table}[htb]
 \centering
 \begin{tabular}{|l||c|c|c|c||c|c|c|c|}\hline
    & \multicolumn{4}{c||}{\texttt{\textbf{DDDD}} problem}          
    & \multicolumn{4}{c|}{\texttt{\textbf{NNDD}} problem} \\ \cline{2-9}
 Grid size  & $V(1,1)$ & $V(2,2)$ & $V(3,3)$ & $V(4,4)$
            & $V(1,1)$ & $V(2,2)$ & $V(3,3)$ & $V(4,4)$ \\ \hline
     $16\times 64$  &  3 &  2 &   2 &  1 &   4 &   2 &   2 &   1\\
     $32\times 128$ &  5 &  3 &   2 &  2 &   5 &   3 &   2 &   2\\
     $64\times 256$ &  7 &  4 &   3 &  3 &   7 &   4 &   3 &   3\\
    $128\times 512$ & 10 &  6 &   4 &  4 &  10 &   6 &   5 &   4\\
   $256\times 1024$ & 11 &  6 &   5 &  4 &  11 &   6 &   5 &   4\\
   $512\times 2048$ & 11 &  6 &   5 &  4 &  11 &   6 &   5 &   4\\
  $1024\times 4096$ & 10 &  6 &   4 &  4 &  10 &   6 &   4 &   4\\
  $1536\times 6144$ &  9 &  6 &   4 &  4 &   9 &   5 &   4 &   3\\
 \hline
 \end{tabular}
 \caption{Multigrid $V$-cycle results for the {\tt DDDD} and {\tt NNDD}
   model problems with $k_x=k_y=4$, $L_x=100$, $L_y=800$, $\tau=1$ and
   $a(x,y)$ given by Eq.~\ref{eq:density}. Shown are the numbers of
   multigrid  $V$-cycles required to reduce the \emph{relative}
   residual norm to less than $10^{-8}$ for different
   grid sizes and numbers of pre and post relaxation
   sweeps. Gauss-Seidel relaxation is used. The coarsest grid size of the
   $1536\times 6144$ case is $3\times 12$ while all the others have a coarsest
   grid of size $2\times 8$.} 
 \label{tab:iternum}
 \end{table}
  
 \begin{figure}[htb!]
   \centering
   \includegraphics[width=\textwidth]{\RepFigures/mg_iterations}
   \caption{Performance of the $V(2,2)$-cycle using the Gauss-Seidel
     relaxation scheme for the {\tt DDDD} (upper curve) and {\tt NNDD}
     (lower curve) problem. The relative tolerance {\tt rtol} is
     set to $10^{-8}$ the coarsest grid size for all the problem size
     is fixed to $2\times 8$.}
   \label{fig:mg_iterations}
 \end{figure}
 
 The fittings of the obtained data show that the multigrid
 $V$ cycle cost scales almost \emph{linearly} with the number of
 unknowns $N=(N_x+1)(N_y+1)$ (as does the backsolve stage of MUMPS)
 while the \emph{total} direct solve time scales as $N^{1.4}$.
 
 \begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/dddd_pc220}
   \caption{Times used by the multigrid $V$ cycles for the runs reported
     in Table~\ref{tab:iternum} for the \texttt{\textbf{DDDD}}
     problem. The last 6 $V(3,3)$ data points are used for the multigrid fit. The
     MUMPS direct solver's cost is included for comparison. All the
     timing results are obtained on an Intel Nehalem i7 processor, using the
     Intel compiler version 13.0.1 and MUMPS-4.10.0. } 
   \label{fig:dddd_pc220}
 \end{figure}
 
 
 \begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/nndd_pc220}
   \caption{As in Fig.~\ref{fig:dddd_pc220} for the
     \texttt{\textbf{NNDD}} problem.}
   \label{fig:nndd_pc220}
 \end{figure}
 
 \FloatBarrier
 \subsection{Effects of the mesh aspect ratio $\alpha$}
 From Table~\ref{tab:anisotropy}, one can observe that the required number of
 $V(2,2)$ cycles increase quickly when $\alpha<0.5$ and $\alpha>2$. Advanced
 \emph{relaxation} methods and \emph{coarsening} strategies
 \cite[chap. 7]{Wesseling} can solve this performance degradation
 but are generally more difficult to parallelize.
 
 \begin{table}[htb]
 \centering
 \begin{tabular}{|l||c|c|c|}\hline
   $\alpha$  & \texttt{\textbf{DDDD}} & \texttt{\textbf{NNDD}} \\ \hline
    0.125    & 19  & 22 \\
    0.25     & 12  & 12 \\
    0.5      &  6  &  6 \\
    1.0      &  5  &  5 \\
    2.0      &  7  &  7 \\
    4.0      & 20  & 19 \\
 \hline
 \end{tabular}
 \caption{Effects of the \emph{mesh aspect ratio} $\alpha=h_x/h_y$ on the
   number of $V(2,2)$ cycles required to reach $\mbox{\tt
     rtol}=10^{-8}$ for {\tt DDDD} and {\tt NNDD}
   model problems. The listed $\alpha$'s are obtained by fixing
   $N_x=256$, $N_y=1024$, $L_x=100$ and varying $L_y$.}
 \label{tab:anisotropy}
 \end{table}
 
 \subsection{Effects of the mixed partial derivative}
 When $|\tau|>2$, Table~\ref{tab:mixedterm} shows that the multigrid
 $V$-cycle  diverge, as predicted from the local mode analysis based on
 the amplification factor given in Eq.~\ref{eq:amp_gs}. Although, a
 non-negative coefficient $a$ has a \emph{stabilizing} effect, the
 latter disappears already for a $256\times 1024$ grid.
 
 \begin{table}[htb]
 \centering
 \begin{tabular}{|l|l|c|c|c|c|c|c|c|c|}\hline
   &Grid size  & $\tau=-3$ & $\tau=-2$ & $\tau=-1$ & $\tau=0$ & $\tau=1$
   & $\tau=2$ & $\tau=3$  \\ \hline
 \multirow{4}{*}{\texttt{\textbf{DDDD}}}
 &$128\times 512(a=0)$ & - & 39 &  7 & 5 & 7 & 38 & -  \\
 &    $128\times 512$  &16 &  6 &  5 & 4 & 4 &  6 & 17 \\
 &   $256\times 1024$  &-  &  8 &  5 & 4 & 5 &  7 &  - \\
 &   $512\times 2048$  &-  &  9 &  5 & 4 & 5 &  9 &  - \\ 
 \hline\hline
 \multirow{4}{*}{\texttt{\textbf{NNDD}}}
 &$128\times 512(a=0)$ & - & 42 &  7 & 5 & 7 & 41 & -  \\
 &    $128\times 512$  &13 &  6 &  5 & 4 & 5 &  5 & 13 \\
 &   $256\times 1024$  &-  &  7 &  5 & 4 & 5 &  7 &  - \\
 &   $512\times 2048$  &-  &  7 &  5 & 4 & 5 &  7 &  - \\ 
 \hline
 \end{tabular}
 \caption{Effects of the mixed derivative term $\tau$ on the
   performances of the  $V(3,3)$ cycle. The dashes
   indicate that the $V$-cycle diverges. In theses runs, $a(x,y)$ is given by
   Eq.~\ref{eq:density} except for the cases where it is set to
   0. Notice the  \emph{stabilizing} effect of $a\ne 0$ for the
   $128\times 512$ grid at $\tau = \pm 3$.}
 \label{tab:mixedterm}
 \end{table}
 
 \subsection{Using the damped Jacobi relaxation}
 The optimum Jacobi damping factor $\omega$ can be determined by minimizing
 the \emph{smoothing factor} defined as the maximum amplification
 coefficient (\ref{eq:amp_jac}) restricted to the \emph{oscillatory modes}:
 \begin{equation}
   \label{eq:mu_jac}
   \mu(\omega,\alpha,\tau) = \max_{(\theta_1,\theta_2)\in\Omega}
   |G(\theta_1,\theta_2,\omega,\alpha,\tau)|, \qquad \Omega =
   [|\theta_1|>\pi/2]\,\bigcup\,[|\theta_2|>\pi/2].
 \end{equation}
 Results from numerical computation of (\ref{eq:mu_jac} are shown
 in Fig.~\ref{fig:jac_opt}. An analytic expression for $\tau=0$
 assuming $\alpha\le 1$ is derived in \cite[p.~119]{salpha}:
 \begin{gather}
   \mu(\omega,\alpha,\tau=0) =
   \max\left(\left|1-2\omega\right|,\;\left|1-\frac{\alpha^2}{1+\alpha^2}\omega\right|\right),
   \nonumber\\    
 \mu_{\mbox{opt}} = \frac{2+\alpha^2}{2+3\alpha^2} \quad
 \mbox{at} \quad \omega_{\mbox{opt}} = \frac{2+2\alpha^2}{2+3\alpha^2}.
 \end{gather}
 Notice that the smoothing factor increases as $\alpha$ departs from 1
 and for increasing $\tau$.
 
 For Gauss-Seidel relaxation, the same numerical procedure applied to
 (\ref{eq:amp_gs}) yields a smoothing factor $\mu$ equal to
 respectively $0.5$, $ 0.68$ and $0.70$ for the three cases shown in
 Fig.~\ref{fig:jac_opt}, which result in a better smoothing property
 than the damped Jacobi relaxation.
 
 \begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/jac_opt}
   \caption{The smoothing factor for damped Jacobi relaxation
     for different values of $\alpha$ and $\tau$.}
   \label{fig:jac_opt}
 \end{figure}
 
 Numerical experiments with the reference case ($\alpha=0.5$, $\tau=1$,
 $a(x,y)$ given by Eq.~\ref{eq:density}) and the $128\times 512$ grid
 using damped Jacobi relaxation, are shown in Table~\ref{tab:jac_opt}
 and confirm that $\omega=0.9$ is the optimum damping factor and that
 it is less efficient than the Gauss-Seidel relaxation, in agreement
 with the Fourier analysis.
 
 \begin{table}[htb]
 \centering
 \begin{tabular}{l c c c c c c}\hline
 &$\omega=0.5$ &$\omega=0.6$ &$\omega=0.7$ &$\omega=0.8$ &$\omega=0.9$
   &$\omega=1.0$ \\ \cline{2-7}
 \texttt{\textbf{DDDD}}& 12 & 10 & 9 & 8 & 7 & 15 \\
 \texttt{\textbf{NNDD}}& 12 & 11 & 9 & 8 & 7 & 18 \\
 \hline
 \end{tabular}
 \caption{The number of $V(3,3)$ cycles required to obtain
   $\mbox{\texttt{rtol}}=10^{-8}$ versus the Jacobi \emph{damped
     factor} $\omega$. The grid size is $128\times 512$ with
   $\alpha=0.5$, $\tau=1$ and $a(x,y)$ given by Eq.~\ref{eq:density}.}
 \label{tab:jac_opt}
 \end{table}
 
 \subsection{Matrix storage}
 Initially the \emph{Compressed Sparse Row} storage format (CSR or CRS) (see
 \cite[p.~58--59]{TEMPL}) was used to store the discretized finite
 difference matrix. With this choice, the CPU time used by the matrix
 construction (and boundary condition setting) is found to be always larger
 than the multigrid solver time as shown in
 Fig.~\ref{fig:matcon_time}. Fortunately, switching to the \emph{Compressed
   Diagonal Storage} (CDS), where the 9 diagonal structure of the
 matrix is fully exploited, the matrix construction time is considerably
 reduced as shown in the same figure. On the other hand, no
 difference in the multigrid solver performance is noticeable
 between the two matrix storage.
 
 \begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=\hsize]{\RepFigures/matcon_time}
   \caption{CPU time used by the matrix construction for CSR and CDS
     matrix storage compared to the multigrid $V(3,3)$ cycle time for
     the \textbf{DDDD} and  \textbf{NNDD} model problems. The timing is
   obtained using the same conditions as in Fig.~\ref{fig:dddd_pc220}.}
   \label{fig:matcon_time}
 \end{figure}
 
 \FloatBarrier
 
 \section{Modified PDE}
 Here, the following modified PDE is considered:
 \begin{equation}
 \label{eq:new_pde}
   \left[\frac{\partial^2}{\partial x^2} + \tau\frac{\partial^2}{\partial
   x\partial y} + (1+\tau^2/4)\frac{\partial^2}{\partial y^2} - a(x,y)\right] u(x,y)  =
   f(x,y), \qquad 0\le x \le L_x, \; 0\le y \le L_y.
 \end{equation}
 This PDE which is obtained from the $\hat s-\alpha$ model
 \cite[Eq.10]{salpha} is
 \emph{elliptic} for any value of $\tau$. The resulting stencil is
 changed from the previous stencil \ref{eq:stencil} to
 \begin{equation}
 \label{eq:new_stencil}
   S_{ij} = \frac{1}{h_x^2}
   \begin{bmatrix}
     -\tau\alpha/4   & \alpha^2(1+\tau^2/4)                  & \tau\alpha/4 \\
           1         & -2\left[1+\alpha^2(1+\tau^2/4)\right]-h_x^2a_{ij} & 1 \\
     \tau\alpha/4    & \alpha^2(1+\tau^2/4)                   &-\tau\alpha/4 \\
   \end{bmatrix}
 .
 \end{equation}
 
 Note that the
 \emph{anisotropy} of the resulting Finite Difference discretization
 is now $\alpha^2(1+\tau^2/4)$ and could be controlled by adjusting both
 the mesh aspect ratio $\alpha$ and the \emph{shear} term $\tau$.
 
 Numerical calculations show that the multigrid $V$-cycles do always
 converge, as shown in Table~\ref{tab:new_mixedterm}.
 
 \begin{table}[htb]
 \centering
 \begin{tabular}{|l|l|c|c|c|c|c|c|c|}\hline
   &Grid size  & $\tau=0$ & $\tau=1$ & $\tau=2$ & $\tau=4$ & $\tau=8$
   & $\tau=16$  \\ \hline
 \multirow{3}{*}{\texttt{\textbf{DDDD}}}
 &    $128\times 512$  &4 & 4 &  5 & 6 & 9  & 20 (6) \\
 &   $256\times 1024$  &4 & 4 &  5 & 6 & 11 & 25 (8)\\
 &   $512\times 2048$  &4 & 4 &  5 & 7 & 12 & 29 (8)\\ 
 \hline\hline
 \multirow{3}{*}{\texttt{\textbf{NNDD}}}
 &    $128\times 512$  &4 &   4 &  4 & 5 & 8 &  17 (5) \\
 &   $256\times 1024$  &4  &  5 &  5 & 5 & 8 &  19 (6) \\
 &   $512\times 2048$  &4  &  4 &  4 & 5 & 7 &  18 (6) \\ 
 \hline
 \end{tabular}
 \caption{Effects of the mixed derivative term $\tau$ on the
   performances of the  $V(3,3)$ cycle. In theses runs, $a(x,y)$ is
   given by Eq.~\ref{eq:density}. The mesh aspect ratio $\alpha=0.5$
   was used. On the last column and shown in parenthesis are the
   numbers of $V(3,3)$ cycles when $\alpha$ is reduced to 0.125 by
   increasing the length $L_y$ while keeping all the other values fixed.}
 \label{tab:new_mixedterm}
 \end{table}
 
 \section{Parallel Multigrid}
 In order to maximize the parallel efficiency and the flexibility of
 utilization, a two-dimensional domain partition scheme is chosen to
 parallelize the multigrid solver. As shown below, generalization of this
 procedure for higher dimensions is straightforward.
 
 \subsection{Distributed grid coarsening}
 The coarsening algorithm can be summarized as follow:
 \begin{itemize}
 \item Partition the grid points on each dimension at the
   \emph{finest} grid level, as evenly as possible.
 \item The range for each sub-grid, using \emph{global indexing} is
   thus specified by $[s,e]$, with $s=0$ for the first sub-grid and
   $e=N$ for the last sub-grid, $N$ being the number of grid intervals.
 \item The next coarse sub-grid is thus obtained by discarding all the
   \emph{odd} indexed grid points, as in the serial case.
 \item This process can continue (as long as the total of number of
   intervals is even) until there exists a prescribed \emph{minimum}
   number of grid points on any sub-grid is reached.
 \end{itemize}
 
 \subsection{Matrix-free formulation}
 Using standard \emph{matrix} to represent the discretized 2D (or higher
 dimensional) operators imply an \emph{one-dimensional numbering} of
 the grid nodes. For example on a 2D $N_x\times N_y$ grid, the 1D numbering
 of the node $(x_{i_1},y_{i_2})$ could be defined as
 \[
  k=i_1+i_2\times N_x, \quad i_1=0:N_x,\;i_2=0:N_y.
 \]
 However, using 2D domain partition defined by
 \begin{equation}
   \label{eq:2dnumber}
   i_1=s_1:e_1,\;i_2=s_2:e_2,
 \end{equation}
 with $s=(s_1,s_2)$ and $e=(e_1,e_2)$ denoting respectively the
 \emph{starting} and \emph{ending} indices of a rectangular sub-domain,
 result in a \emph{non-contiguous} set of the indices ${k}$ and in a
 complicate structure of the partitioned matrix for the linear
 operator.
 
 On the other hand, using the \emph{stencil notation}
 introduced in \cite[chap. 5.2]{Wesseling} based on the
 \emph{multidimensional} node labeling as defined by
 (\ref{eq:2dnumber}) for a 2D problem, one can define a simple data
 structure for the partitioned operator, $A(i,\delta)$,
 where the $d$-tuple $i=(i_1,\ldots,i_d)$ represents a node of the
 $d$-dimensional grid and the $d$-tuple
 $\delta=(\delta_1,\ldots,\delta_d)$, the 
 \emph{distance} between the connected nodes. The result of
 $\mathbf{A}u$ can thus be defined as 
 \begin{equation}
   \label{eq:vmx}
   (\mathbf{A}u)_i = \sum_{\delta\in\mathbb{Z}^d}
   A(i,\delta)u_{i+\delta}, \quad i=s:e. 
 \end{equation}
 In (\ref{eq:vmx}), the sum is performed over all
 indices $\delta$ such that $A(i,\delta)$ is non-zero. For the 2D 
 nine-point stencil defined in (\ref{eq:stencil}), the 2-tuple
 $\delta$ can be specified as the 9 columns of the following
 \emph{structure} matrix
 \begin{equation}
   \label{5points}
   S_\delta = \left(
   \begin{array}{rrrrrrrrr}
     0  & -1 & 0 & 1 & -1 & 1 & -1& 0 & 1 \\
     0  & -1 & -1&-1 &  0 & 0 & 1 & 1 & 1 \\
   \end{array} \right).
 \end{equation}
 In the general case of a $d$-dimensional grid and $\mathcal{N}$ point stencil,
 $S_\delta$ is a $d\times\mathcal{N}$
 matrix. By noting that the subscript $i+\delta$ of $u$ on the right hand side of
 (\ref{eq:vmx}) should be in the range $[0,N]$ \emph{only} for sub-domains which are
 \emph{adjacent} to the boundary, one can deduce that for
 a \emph{fixed} $\delta$, the lower and upper bounds of the indices $i$
 should be 
 \begin{equation}
   \begin{split}
   i_{\mbox{min}} &= \max (0, -\delta, s), \\
   i_{\mbox{max}} &= \min (N, N-\delta, e) \\
   \end{split}
 \end{equation}
 where $N=(N_1,N_2,\ldots,N_d)$ specify the number of intervals,
 since, for sub-domains \emph{not adjacent} to the boundary, $u$ should
 include values at the \emph{ghost} cells $s-g$ and $e+g$ where $g$ is
 given by 
 \begin{equation}
   g = \max|S_\delta|
 \end{equation}
 with the operator max taken along the \emph{rows} of the matrix.
 The formula defined in (\ref{eq:vmx}) can then be implemented as in
 the \emph{pseudo} Fortran code
 
 \par
 \addvspace{\medskipamount}
 \nopagebreak\hrule
 \begin{lstlisting}[mathescape]
 do k=1,SIZE($S_{\delta}$,2)   ! loop over the stencil points
   $\delta$ = $S_{\delta}$(:,k)
   lb = MAX(0,-$\delta$,$s$)
   ub = MIN($N$,$N-\delta$,e)
   do i=lb,ub
     Au(i) = Au(i) + A(i,$\delta$)*u(i+$\delta$)
   enddo
 enddo
 \end{lstlisting} 
 \nopagebreak\hrule 
 \addvspace{\medskipamount}
 
 On the other hand, if the values of $u$ at the ghost cells of the
 sub-domains \emph{adjacent} to the boundary are set to 0
 \begin{equation*}
   u_{-g} = u_{N+g} = 0,
 \end{equation*}
 the lower and upper bounds of the
 inner loop can be simply set to $lb=s$ and $ub=e$. Note that the inner
 loop should be interpreted as $d$ nested loops over the $d$-tuple
 $i=(i_1,\ldots,i_d)$ for a $d$-dimensional problem.
 
 \subsection{Inter-grid transfers}
 
 \subsubsection{Restriction}
 Using the definition in the first equation of (\ref{eq:2dintergrid})
 together with (\ref{eq:1drestriction}), the 2D restriction operator
 can be represented by the following 9-point stencil:
 \begin{equation}
   \label{eq:2drestriction}
   \mathbf{R}_i = \frac{1}{16}
   \begin{pmatrix}
     1 & 2 & 1 \\
     2 & 4 & 2 \\
     1 & 2 & 1 \\
   \end{pmatrix},
 \end{equation}
 and the restriction of $f$ can be computed as
 \begin{equation}
   \bar{f}_i = (\mathbf{R}f)_i = \sum_{\delta\in\mathbb{Z}^2}
   R(i,\delta)f_{2i+\delta}, \quad i=\bar{s}:\bar{e},  
 \end{equation}
 where $\bar{s},\bar{e}$ denote the partitioned domain boundary indices on the
 \emph{coarse} grid, using the same algorithm described previously.
 
 \subsubsection{BC for the restriction operator}
 \label{sec:restrict_bc}
 Dirichlel boundary conditions can be imposed by modifying the
 \emph{restriction stencil} on each of the four boundaries as follow:
 \begin{equation}
   \mathbf{R}_{0,.} = \frac{1}{16}\begin{pmatrix}
     1 & 2 & 0 \\
     2 & 4 & 0 \\
     1 & 2 & 0 \\
   \end{pmatrix},\quad
   \mathbf{R}_{N_x,.} =  \frac{1}{16}\begin{pmatrix}
     0 & 2 & 1 \\
     0 & 4 & 2 \\
     0 & 2 & 1 \\
   \end{pmatrix},\quad
   \mathbf{R}_{.,0} =  \frac{1}{16}\begin{pmatrix}
     0 & 0 & 0 \\
     2 & 4 & 2 \\
     1 & 2 & 1 \\
   \end{pmatrix},\quad
   \mathbf{R}_{.,N_y} = \frac{1}{16}\begin{pmatrix}
     1 & 2 & 1 \\
     2 & 4 & 2 \\
     0 & 0 & 0 \\
   \end{pmatrix}.
 \end{equation}
 With the natural Neumann BC, no change of the restriction operator is needed.
 
 \subsubsection{Prolongation}
 Stencil notation for \emph{prolongation} operators is less obvious to
 formulate, see \cite[chap. 5.2]{Wesseling}. A more straightforward
 implementation is obtained in the 2D case, by simply applying
 \emph{bilinear interpolation} on the \emph{coarse grid}:
 \begin{equation}
   \label{eq:2dprolongation}
   \begin{split}
    (\mathbf{P}\bar{u})_{2i}   &= \bar{u}_{i}, \\
    (\mathbf{P}\bar{u})_{2i+e_1} &= (\bar{u}_{i} + \bar{u}_{i+e_1})/2, \quad
    (\mathbf{P}\bar{u})_{2i+e_2} = (\bar{u}_{i} + \bar{u}_{i+e_2})/2, \\
    (\mathbf{P}\bar{u})_{2i+e_1+e_2} &= (\bar{u}_{i} +
       \bar{u}_{i+e_1} + \bar{u}_{i+e_2} + \bar{u}_{i+e_1+e_2})/4, \\
   \end{split}
 \end{equation}
 
 \subsection{Relaxations}
 While the Gauss-Seidel proves to be more efficient, the damped Jacobi
 method, at least for a first version of the parallel multigrid solver,
 is used because it is straightforward to \emph{parallelize}. The same
 undamped Jacobi (with $\omega=1$) with a \emph{few}  number of 
 iterations is also used to solve the linear system at the coarsest
 mesh as prescribed by the multigrid $V$-cycle procedure defined in
 section \ref{sec-mgProc}. 
 
 \subsection{Local vectors and stencils}
 All local vectors (used to represent solutions or
 right-hand-sides) contain \emph{ghost cells} and are implemented
 using 2D arrays, for example
 \[\mbox{\texttt{sol(s(1)-1:e(1)+1,s(2)-1:e(2)+1)}}\]
 for the solution vector.
 
 The partitioned stencils are defined
 only for the \emph{local} grid points, without the ghost cells.
 Thus, before each operation on the local vectors, an exchange (or
 update) of the values on the ghost cells is performed.
 
 As a result, all the memory required by the solver is completely
 partitioned, except for the space used by the ghost cells.
 
 \subsection{Numerical Experiments}
 In this section, all the numerical experiments are conducted on
 \texttt{helios.iferc-csc.org}, using the Intel compiler version 13.1.3
 and bullxpmi-1.2.4.3. The \emph{stopping criteria} for the $V$-cycles
 is based on the absolute and relative residual norms as well as the
 discretization error norm as defined in section \ref{sec:NumExp1}. In
 cases where the analytic solution is not known, the latter can be
 replaced by some norm of the solution.
 
 \subsubsection{Strong scaling}
 \begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=\hsize]{\RepFigures/strong_256x1024_DDD}
   \caption{DDDD problem for a $256\times 1024$ size, using multigrid
     $V(3,3)$ cycles. Different times for a given number of
     processes are obtained with different combinations of processes in
     each dimension. The number of grid levels are fixed to 6. Five Jacobi
     iterations are used at the coarsest grid.}
   \label{fig:strong_scal_small}
 \end{figure}
 
 \begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=\hsize]{\RepFigures/strong_512x2048_DDD}
   \caption{DDDD problem for a $512\times 2048$ size  using multigrid
     $V(3,3)$ cycles. The \textcolor{red}{red marker} on the left shows
     the time for the serial multigrid solver. Different times for a
     given number of processes are obtained with different combinations
     of processes in each dimension. The number of grid levels are fixed
     to 6. Five Jacobi iterations are used at the coarsest grid.}
   \label{fig:strong_scal}
 \end{figure}
 
 Here 2 \emph{fixed} problem sizes are considered:
 \begin{itemize}
   \item A small size with the (fine) grid of $256\times 1024$
     shown in Fig.~\ref{fig:strong_scal_small} and
   \item a larger size of $512\times 2048$ in Fig.~\ref{fig:strong_scal}.
 \end{itemize}
 
 In both cases $\mbox{\tt rtol}=10^{-8}$ and $\mbox{\tt
   etol}=10^{-3}$. It was checked that the results do not change when more
 than 5 Jacobi iterations are used at the coarsest mesh. Notice that
 for the small problem, the parallel efficiency starts to degrade at 32
 MPI processes while for the larger case, this happens after 64 MPI
 processes. This can be explained by 
 the ghost cell exchange communication overhead: denoting $N_1$ and
 $N_2$, the number of grid points in each direction and $P_1$ and $P_2$
 the number of MPI processes in each direction, the ratio $S/V$ between the
 number of ghost points and interior grid points for each local
 subdomains can be estimated as 
 \begin{equation}
 \label{eq:comm_overhead}
   S/V\simeq\frac{2(N_1/P_1+N_2/P_2)}{N_1N_2/P_1P_2}  = 2\left(P_1/N_1+P_2/N_2\right). 
 \end{equation}
 This ratio increases as the number MPI processes increases while
 keeping the problem size fixed. On very coarse grids, this communication
 cost can become prohibitive. For this reason, in all the runs shown
 here, the number of grid points on each direction for the coarsest
 grid is limited to 2.
 
 \subsubsection{Weak Scaling}
 \begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/weak_DDDD}
   \caption{Weak scaling for a DDDD problem, using multigrid $V(3,3)$
     cycles. The number of grid levels are fixed to 7. The solver for the
     coarsest grid uses 5 Jacobi iterations except for the 2 largest cases
     which require respectively 20 and 100 iterations to  converge. The 2
     sets of curves on the right figure show respectively the timings with
     and without the calculations of the residual norm and discretization
     error which require both a \emph{global reduction}.}
   \label{fig:weak_scal_DDDD}  
 \end{figure}
 
 \begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/weak_NNDD}
   \caption{Weak scaling for a NNDD problem, using multigrid $V(3,3)$
     cycles. The number of grid levels are fixed to 7. The solver for the
     coarsest grid uses 5 Jacobi iterations except for the 2 largest cases
     which require respectively 20 and 100 iterations to  converge. The 2
     sets of curves on the right figure show respectively the timings with
     and without the calculations of the residual norm and discretization
     error which require both a \emph{global reduction}.}
   \label{fig:weak_scal_NNDD}  
 \end{figure}
 
 According to Eq.~\ref{eq:comm_overhead}, varying the problem size
 together with the number of MPI processes by keeping $N_1/P_1$ and
 $N_2/P_2$ constant should yield a \emph{constant scaling},
 provided that the convergence rate does not depend on the problem sizes.
 The results for the \texttt{DDDD} and \texttt{NNDD} problems are shown
 in Fig.~\ref{fig:weak_scal_DDDD} and
 Fig.~\ref{fig:weak_scal_NNDD}. The left part of the figures shows that
 the convergence rate depends only weakly on the problem sizes, which
 leads indeed to a (almost) constant time obtained for numbers of MPI
 processes $P$ between 16 and 1024 . The reason for the good timings
 for smaller $P$ is simply that there are only 2 ghost cell exchanges
 for $P=2\times 2$ (instead of 4 for $P\ge 16$) and that there is no
 exchange for $P=0$.
 
 \section{Non-homogeneous Boundary Conditions}
 
 \subsection{Non-homogeneous Dirichlet Conditions}
 Non-homogeneous Dirichlet boundary conditions can be imposed on all the
 Dirichlet faces simply by \emph{clearing}, as for the 
 \emph{homogeneous case}, the matrice rows and columns and setting its
 diagonal term to 1. Moreover, the corresponding corresponding
 \emph{right-hand-side} should be set to:
 \begin{equation}
   \begin{split}
     f_{0,j} &= D^W(y_j), \quad f_{N_x,j}=D^E(y_j), \qquad j=0,\ldots,N_y, \\
     f_{i,0} &= D^S(x_i), \quad f_{j,N_y}=D^N(x_i), \qquad i=0,\ldots,N_x, \\
   \end{split}
 \end{equation}
 where $D^W, D^E, D^S, D^N$ are the values of $u$ at the 4 Dirichlet
 faces. As for the homogeneous Dirichlet BC, the \emph{restriction}
 operator should be changed as described in section
 \ref{sec:restrict_bc} while the \emph{prolongation} defined
 in (\ref{eq:2dprolongation}) remains unchanged.
 
 \subsection{Non-homogeneous Neumann Conditions}
 The non-homogeneous Neumann conditions at the 4 faces $x=0$ can be
 defined as
 \begin{equation}
   \begin{split}
   \left.\frac{\partial u}{\partial x}\right|_{x=0} &= N^W(y), \quad
   \left.\frac{\partial u}{\partial x}\right|_{x=L_x} = N^E(y), \\
   \left.\frac{\partial u}{\partial y}\right|_{y=0} &= N^S(x), \quad
   \left.\frac{\partial u}{\partial y}\right|_{y=L_y} = N^N(x). \\
   \end{split}
 \end{equation}
 Discretization of the BC defined above, using the \emph{central
 difference} yields on the 4 faces
 \begin{equation}
   \begin{split}
   u_{-1,j}   &= u_{1,j} -2h_xN^W(y_j), \quad
   u_{N_x+1,j} = u_{N_x-1,j} + 2h_xN^E(y_j), \qquad j=0,\ldots,N_y, \\
   u_{i,-1}   &= u_{i,1} -2h_yN^S(x_i), \quad
   u_{i,N_y+1} = u_{i,N_y-1} + 2h_yN^N(x_i), \qquad i=0,\ldots,N_x. \\
   \end{split}
 \end{equation}
 
 With these relations, the stencil (\ref{eq:new_stencil}) on the 4
 boundaries is modified as follow 
 
 \begin{equation}
   \begin{split}
   S^W &= \frac{1}{h_x^2}
   \begin{bmatrix}
      0 &\alpha^2(1+\tau^2/4)                  & 0 \\
      0 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{0,j} & 2 \\
      0 &\alpha^2(1+\tau^2/4)                  & 0 \\
   \end{bmatrix}
 ,\quad
   S^E = \frac{1}{h_x^2}
   \begin{bmatrix}
      0 &\alpha^2(1+\tau^2/4)                     & 0 \\
      2 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{N_x,j} & 0 \\
      0 &\alpha^2(1+\tau^2/4)                     & 0 \\
   \end{bmatrix}
 , \\
   S^S &= \frac{1}{h_x^2}
   \begin{bmatrix}
      0 &                2\alpha^2(1+\tau^2/4)  & 0 \\
      1 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{i,0} & 1 \\
      0 & 0 & 0 \\
   \end{bmatrix}
 ,\quad   
   S^N  = \frac{1}{h_x^2}
   \begin{bmatrix}
      0 &  0  & 0 \\
      1 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{i,N_y} & 1 \\
      0 &                2\alpha^2(1+\tau^2/4)    & 0 \\
   \end{bmatrix}
 , \\
   \end{split}
 \end{equation}
 while the right-hand-side  should be changed according to
 \begin{equation}
   \begin{split}
     f_{0,j} &\longleftarrow f_{0,j} + \frac{2}{h_x}\left[\frac{\tau\alpha}{4}\,N^W(y_{j-1})
   + N^W(y_j) - \frac{\tau\alpha}{4}\,N^W(y_{j+1})\right], \\
     f_{N_x,j} &\longleftarrow f_{N_x,j} + \frac{2}{h_x}\left[\frac{\tau\alpha}{4}\,N^E(y_{j-1})
   - N^E(y_j) - \frac{\tau\alpha}{4}\,N^E(y_{j+1})\right], \\
     f_{i,0} &\longleftarrow f_{i,0} + \frac{2}{h_y}\left[\frac{\tau}{4\alpha}\,N^S(x_{i-1})
   + (1+\tau^2/4)N^S(x_i) - \frac{\tau}{4\alpha}\,N^S(x_{i+1})\right], \\
     f_{i,N_y} &\longleftarrow f_{i,N_y} + \frac{2}{h_y}\left[\frac{\tau}{4\alpha}\,N^N(x_{i-1})
   - (1+\tau^2/4)N^N(x_i)  - \frac{\tau}{4\alpha}\,N^N(x_{i+1})\right]. \\
   \end{split}
 \end{equation}
 
 \subsection{The NNDD test problem}
 In order to test the discretization of the non-homogeneous boundary
 conditions as formulated above, a test problem with the prescribed
 \emph{exact} solution 
 \begin{equation}
   u(x,y) = 1 + \sin\frac{2\pi k_xx}{L_x}\sin\frac{2\pi k_yy}{L_y},
     \qquad \mbox{where $k_x$, $k_y$ are positive integers}
 \end{equation}
 and the following non-homogeneous boundary conditions
 
 \begin{equation}
   \begin{split}
     \left.\frac{\partial u}{\partial x}\right|_{x=0} =
     \left.\frac{\partial u}{\partial x}\right|_{x=L_x} &=
     k_x\sin\frac{2\pi k_yy}{L_y}, \\
     u(x,0) = u(x,L_y) &= 1, \\
   \end{split}
 \end{equation}
 
 is solved with varying grid spacing. The discretization errors versus
 the number of grid intervals $N_x$ displayed in Fig(\ref{fig:conv_nh_bc} shows
 a \emph{quadratic} convergence as expected from the second order
 finite differences used in both the PDE and the Neumann boundary condition
 discretization.
 
 \begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/conv_nh_bc}
   \caption{Convergence of the error
     $\| u_{calc}- u_{anal}\|_\infty$ wrt the number of intervals in
     the $x$ direction $N_x$ for the non-homogeneous NNDD
     problem. Here, $L_x=100$, $L_y=800$, $k_x=k_y=4$, $\tau=1$ and $N_y=4N_x$.}
   \label{fig:conv_nh_bc}
 \end{figure}
 
 As shown in Fig.(\ref{fig:nndd_nh}), the multigrid $V$-cycles for the
 \emph{non-homogeneous} problem converge with a slightly smaller efficiency,
 than the \emph{homogeneous} problem shown in Fig.(\ref{fig:weak_scal_NNDD}).
 
 \begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/nndd_nh}
   \caption{Performances of the $V(3,3)$-cycle for the non-homogeneous
     NNDD problem. The same parameters in Fig.(\ref{fig:conv_nh_bc})
     are used here.}
   \label{fig:nndd_nh}
 \end{figure}
 
 \FloatBarrier
 
 \subsection{Local relaxation methods}
 In addition to the damped Jacobi, three methods of relaxations are
 added in this parallel multigrid solver:
 \begin{enumerate}
 \item The 4 color Gauss-Seidel method (RBGS).
 \item The Gauss-Seidel method (GS).
 \item The successive over-relaxation method (SOR).
 \end{enumerate}
 In order to apply correctly the parallel 4 color Gauss-Seidel, a
 complicated ghost cell exchange has to be performed for each sweep
 for each color. Here we simply apply the method \emph{locally} on each
 subdomain with only one ghost exchange performed at the beginning of
 each relaxation.
 
 The same procedure is also used for the other 2 methods which are
 inherently \emph{serial}. All these 3 relaxations are
 thus only correct if there is only one subdomain. As a 
 consequence, while the damped Jacobi does not depend on the
 partition of the subdomains, results from these 3 methods do depend on
 how the domain is partitioned.
 
 Table~\ref{tab:advrelax} show
 however that all of the 3 \emph{approximated} relaxation methods produce
 a much \emph{faster} convergence rate than the damped Jacobi relaxations for the NNDD test
 problem considered here. The performance of the implemented solver
 using the 4 relaxation methods on HELIOS is compared in
 Fig,(\ref{fig:weakhelios}. The bad performance of the 4 color
 Gauss-Seidel relaxations (RBGS) can be explained by the 4 nested loops
 required to sweep each of the 4 colors.
 
 \begin{table}[hbt]
 \centering
 \begin{tabular}{|l||r|r|r|r|r|r|}\hline
  Grid Sizes          & $256\times 1024$ & $512\times 2048$ & $1024\times 4096$ & $2048\times 8192$ & $4096\times 16384$ & $8192\times 32768$ \\
 \hline
  Process topology    &
  \multicolumn{1}{c|} {$1\times 1$} &
  \multicolumn{1}{c|} {$2\times 2$} &
  \multicolumn{1}{c|} {$4\times 4$} &
  \multicolumn{1}{c|} {$8\times 8$} &
  \multicolumn{1}{c|} {$16\times 16$} &
  \multicolumn{1}{c|} {$32\times 32$}  \\
 \hline
  Jacobi $\omega=0.9$ &     0.22 &     0.24 &      0.24 &      0.24 &       0.24 &       0.25 \\
  RBGS                &     0.05 &     0.07 &      0.10 &      0.10 &       0.12 &       0.12 \\
  GS                  &     0.07 &     0.08 &      0.10 &      0.11 &       0.11 &       0.12 \\
  SOR $\omega=1.2$    &     0.04 &     0.05 &      0.07 &      0.07 &       0.07 &       0.08 \\
 \hline
 \end{tabular}
 \caption{Reduction factor for the residuals (obtained as the \emph{geometric mean} of
   all its values except the first 2 values) for the non-homogeneous
   NNDD test problem. The same parameters as in Fig.(\ref{fig:conv_nh_bc}) are used here.}
 \label{tab:advrelax}
 \end{table}
 
 \begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=0.9\hsize]{\RepFigures/weak_helios}
   \caption{Performance of the 4 relaxations on the non-homogeneous
     NNDD problem. The same parameters in Fig.(\ref{fig:conv_nh_bc})
     are used here. The grid sizes used in this \emph{weak scaling}
     run are shown in Table~\ref{tab:advrelax}.}
   \label{fig:weakhelios}
 \end{figure}
 
 \FloatBarrier
 
 \section{Performance of the Stencil Kernel on different platform}
 To get a feeling on the performances gained on the different
 platforms and how well the compilers (with their
 auto-vectorization capability) support these platforms, 
 the following Fortran \emph{9-point stencil} kernel has been used. The
 OpenMP directives are used for parallelization on both Xeon and Xeon
 Phi while offload to GPU card is done via the high level OpenACC
 directives. \emph{First touch} is applied in the initialization of
 \texttt{x} and \texttt{mat}.
 
 \begin{lstlisting}[language=Fortran,numbers=left,commentstyle=\color{blue},keywordstyle=\color{red},frame=single]
 !$omp parallel do private(ix,iy)
 !$acc parallel loop present(mat,x,y) private(ix,iy)
     DO iy=0,ny
        DO ix=0,nx
           y(ix,iy) =  mat(ix,iy,1)*x(ix-1,iy-1) &
                &    + mat(ix,iy,2)*x(ix,  iy-1) &
                &    + mat(ix,iy,3)*x(ix+1,iy-1) &
                &    + mat(ix,iy,4)*x(ix-1,iy)   &
                &    + mat(ix,iy,0)*x(ix,iy)     &
                &    + mat(ix,iy,5)*x(ix+1,iy)   &
                &    + mat(ix,iy,6)*x(ix-1,iy+1) &
                &    + mat(ix,iy,7)*x(ix,  iy+1) &
                &    + mat(ix,iy,8)*x(ix+1,iy+1)
        END DO
     END DO
 !$acc end parallel loop
 !$omp end parallel do
 \end{lstlisting}
 
 The performances on a Helios dual processor node and its attached Xeon
 Phi co-processor are shown in Fig.~\ref{fig:cpu_mic} while the
 performances on a Cray XC30 CPU and its attached NVIDIA graphics card are
 shown in Fig.~\ref{fig:cpu_gpu}. In these figures, Intel optimization
 flag \texttt{-O3} and default Cray optimization were applied. In
 Fig.~\ref{fig:cpu_mic_O1_O3}, the speedup by vectorization is shown by
 comparing performances obtained with \texttt{-O3} and \texttt{-O1}.
 Several observations can be drawn from these results.
 \begin{itemize}
 \item The parallel scaling, using OpenMP is linear for both Intel and
   Cray compilers, when the problem sizes fit into the 20MB cache of
   the Sandybridge processor. For grid sizes smaller than $32\times8$, the
   overhead of thread creation dominates. When the memory footprint is
   larger than the cache, 4 threads per socket already saturate the memory
   bandwidth.
 \item On the MIC, the parallel speedup scales linearly up to 60
   cores with 1 thread per core. Using 2 or 3 threads per core does not help
   while with 4 threads, the performance even degrades.
 \item The MIC, using the Intel \emph{mic native} mode, does not
   perform better than 8 cores of the Sandybridge processor.
 \item Since the benefit from \emph{vectorization} is quite large for
   the MIC (see Fig.~\ref{fig:cpu_mic_O1_O3}), the poor parallel
   scalability may be explained by the low flop intensity per thread
   coupled with the high overhead of the (many) thread creation and
   thread synchronization.
 \item The NIVIDIA card, using the high level OpenACC programming
   style is more than 3 times faster than 8 Sandybridge cores, for grid
   sizes larger than $1024\times256$. For smaller sizes, there are not
   enough flops to keep the GPU threads busy.
 \end{itemize}
 
 
 \begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=\hsize]{\RepFigures/cpu_mic}
   \caption{Performance on the Helios dual processor (left) using the
     \texttt{-O3} compiler option and on the MIC (right), 
     using the native mode \texttt{-mmic}.}
   \label{fig:cpu_mic}
 \end{figure}
 
 \begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=\hsize]{\RepFigures/cpu_gpu}
   \caption{Performance on a Cray XC30 single 8 core processor node
     (left) and the NVIDIA card (right) using OpenACC. Default Cray
     Fortran compiler optimization has been used on both runs.}
   \label{fig:cpu_gpu}
 \end{figure}
 
 \begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=\hsize]{\RepFigures/cpu_mic_O1_O3}
   \caption{Performance comparison between using \texttt{-O3} and
     \texttt{-O1}, on the Helios dual processor (left) and on the MIC (right).}
   \label{fig:cpu_mic_O1_O3}
 \end{figure}
 
 \FloatBarrier
 
 \section{Hybrid MPI+OpenMP \texttt{PARMG (r599})}
 In this version, a \emph{straightforward} parallelization is
 done in the subroutines \texttt{jacobi, residue, prolong, restrict} and
 \texttt{norm\_vec}, using the OpenMP work sharing directives. The
 ghost cell exchange is executed by the \emph{master} thread.
 All the 2D arrays (solutions, RHS, etc.) are allocated and initialized
 \emph{once} by the \emph{master} thread. Dynamic array allocations 
 during the multigrid $V$-cycles are thus avoided.
 
 To help further optimization, timings are introduced for each of the 4
 multigrid components \texttt{jacobi, residue, prolong, restrict} and
 the ghost cell \texttt{exchange} as well as on the \emph{recursive}
 subroutine \texttt{mg}. Since the timings of the 4 MG components
 include already calls to \texttt{exchange}, the time obtained for
 \texttt{mg} should be equal to the sum of the 4 MG components and 
 the \emph{extras} time which includes operations in \texttt{mg} but
 not in the 4 components:
 \begin{equation}
   \label{eq:timings}
   t_\text{mg} = t_\text{jacobi} + t_\text{residue} + t_\text{prolong}
   + t_\text{restrict} + t_\text{extras}.
 \end{equation}
 We will see in the following sections that, in addition to these 5
 contributions to $t_\text{mg}$,
 there is \emph{overhead} probably due to the \emph{recursive}
 calls of \texttt{mg}.
 
 \subsection{Parallel efficiency on single node}
 The comparison in Fig.~\ref{fig:single_node} shows that the pure
 OpenMP version is at most $30\%$ slower than the pure MPI version when all the
 16 cores are used but less than $10\%$ when only one socket is
 used. The degradation of the OpenMP version can be explained by the
 \emph{numa} effects when 2 sockets are used. It is also observed
 that the performance level off at 4 cores, due to the
 saturation of the socket memory bandwidth.
 \begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/single_node}
   \caption{Parallel performance of the 7 level $V(3,3)$-cycle on a
     dual socket Helios node ($2\times 8$ cores) for pure OpenMP and pure
     MPI. The non-homogeneous NNDD problem with the same parameters in
     Fig.~\ref{fig:conv_nh_bc} is considered here. The OpenMP threads and MPI
     tasks are placed first on the first socket before filling the
     second socket, using the \texttt{srun} option
     ``\texttt{--cpu\_bind=cores -m block:block}'' and the environment
     variable \texttt{OMP\_PROC\_BIND=true}.}
   \label{fig:single_node}
 \end{figure}
 
 \subsection{Hybrid efficiency on multi-nodes}
 In the following multi-node experiments, all the 16 cores on each
 Helios node are
 utilized. The numbers of OpenMP threads \emph{per} MPI \emph{process} NT, the
 number of MPI processes \emph{per node} NP, the number of nodes NNODES
 and the \emph{total} number of MPI processes $\text{NP}_{tot}$ verify
 thus the following relations:
 \begin{equation}
   \begin{gathered}
     1\le\text{NT} \le 16, \qquad 1\le\text{NP} \le 16 \\
     \text{NT}\times\text{NP} = 16 \\
     \text{NP}_{tot} = 16\times\text{NNODES}/\text{NT} \\
   \end{gathered}
 \end{equation}
 
 The times of the different MG components and the relative
 contributions for the \emph{strong scaling}
 experiments using a $1024\times 4096$ grid size, are shown in 
 Fig.~\ref{fig:hybrid_strong} and Fig.~\ref{fig:hybrid_strong_contrib}
 respectively. The following observations can be made:  
 \begin{enumerate}
   \item The \texttt{exchange} time increases strongly with increasing
   NNODES, due to smaller partitioned subdomains and thus their larger
   surface/volume ratio.
   \item The pure MPI (NT=1) \texttt{exchange} time is on the other
     hand reduced with
   $\text{NT}>1$ since the local partitioned grid becomes larger.
   \item The less efficient OpenMP parallelization (numa effects,
   Amdahl's law) tends to limit however this advantage.
   \item As a result, there is an optimal NT for a given NNODES:
     2 for 4 and 16 nodes, 8 for 64 nodes.
   \item The \texttt{jacobi} and \texttt{residue} contributions dominate
     largely with $0.63\le t_\text{jacobi}/t_\text{mg}\le 0.83$ and
     $0.09\le t_\text{residue}/t_\text{mg}\le 0.18$.
   \item The \emph{overhead} (see Eq.~\ref{eq:timings}) times increase
     with NNODES but decrease slightly for increasing NT.
 \end{enumerate}
 
 The times of the different MG components and the relative
 contributions for the \emph{weak scaling}
 experiments are shown in Fig.~\ref{fig:hybrid_weak} and
 Fig.~\ref{fig:hybrid_weak_contrib} respectively. The following
 observations can be made: 
 \begin{enumerate}
 \item A steady increase of MG times with the number of nodes can
   be attributed to the increase of ghost cells \texttt{exchange} time,
   even though the amount of communications between nodes does not
   change.
 \item The MG performance is improved slightly when NT=2 but drop
   drastically for NT=16. This seems to indicate that
   \emph{numa} effects are important here, since the array initialization
   is not done locally on each thread.
 \item The \emph{overhead} (see Eq.~\ref{eq:timings}) times are much
   smaller than in the \emph{strong scaling} runs.
 \end{enumerate}
 
 \begin{sidewaysfigure}
   \centering
   \includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_strong}
   \caption{Detailed timings for strong scaling experiments using the same
     problem parameters as in Fig.~\ref{fig:single_node}, except that 5
     levels are chosen to be able to run the runs with 64 nodes.}
   \label{fig:hybrid_strong}
 \end{sidewaysfigure}
 
 \begin{sidewaysfigure}
 %\begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_strong_contrib}
   \caption{Relative contributions of each of the MG components for the
     strong scaling experiments using the same
     problem parameters as in Fig.~\ref{fig:single_node}.}
   \label{fig:hybrid_strong_contrib}
 %\end{figure}
 \end{sidewaysfigure}
 
 \begin{sidewaysfigure}
 %\begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_weak}
   \caption{Detailed timings for weak  scaling experiments using the same
     problem parameters as in Fig.~\ref{fig:single_node}.}
   \label{fig:hybrid_weak}
 %\end{figure}
 \end{sidewaysfigure}
 
 \begin{sidewaysfigure}
 %\begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_weak_contrib}
   \caption{Relative contributions of each of the MG components for the
     weak scaling experiments using the same
     problem parameters as in Fig.~\ref{fig:single_node}.} 
   \label{fig:hybrid_weak_contrib}
 %\end{figure}
 \end{sidewaysfigure}
 
 Finally, Table~\ref{tab:memory} shows that using $\text{NT}>1$
 decreases the memory needed by the multigrid procedure for both strong
 and weak scaling runs.
 
 \begin{table}[htb]
 \begin{center}
 \begin{tabular}{|l|c||r|r|r|r|r|}
 \hline
  &\texttt{\textbf{NNODES}}& NT=1 & NT=2 & NT=4 & NT=8 & NT=16  \\
 \hline
 \multirow{4}{*}{\texttt{\textbf{Strong Scaling}}}
 &1    & 57.01 & 53.70 & 52.04 & 51.06 & 48.57  \\
 &4   & 21.87 & 21.49 & 15.75 & 13.87 & 13.11  \\
 &16  & 13.82 & 8.52 & 5.75 & 5.69 & 4.00  \\
 &64  & 13.93 & 6.80 & 3.73 & 2.32 & 1.48  \\
 \hline\hline
 \multirow{4}{*}{\texttt{\textbf{Weak Scaling}}}
 &1   & 57.03 & 53.71 & 52.04 & 51.08 & 48.61 \\
 &4   & 59.24 & 59.18 & 53.39 & 51.52 & 48.69 \\
 &16  & 60.48 & 55.33 & 52.69 & 52.74 & 49.06 \\
 &64  & 63.30 & 56.13 & 53.30 & 51.58 & 48.79 \\
 \hline
 \end{tabular}
 \end{center}
   \caption{Memory footprint \emph{per core} (MB/core) for the strong scaling and weak
     scaling experiments.}
   \label{tab:memory}
 \end{table}
 
 \subsection{Summary and conclusions}
 The \emph{strong scaling} and  \emph{weak scaling} wrt NT and NNODES
 are summarized in  Fig.~\ref{fig:scaling}. The speed up for the strong
 scaling experiments shows a good efficiency up to 16 nodes
 for all NT but degrades at 64 nodes (1024 cores) due the partitioned
 grid becoming too small. A good \emph{weak scaling} is also
 obtained with an increase in $t_\text{mg}$ of
 less than $10\%$ when NNODES vary from 4 to 64. However, for NT=16, the
 efficiency drops significantly, due to the non-local memory access
 when the OpenMP threads are placed on both sockets (\emph{numa} effect).
 
 In order to improve the hybrid MPI+OpenMP multigrid, especially for large number
 of threads per MPI process NT, the following optimizations should be done:
 \begin{itemize}
   \item \emph{First touch} array initialization in order to avoid
     \emph{numa} effects.
   \item OpenMP parallelization of some remaining \emph{serial} loops.
   \item Better vectorization of inner loops.
 \end{itemize}
 The outcome of these optimization steps is important in order to run
 efficiently on upcoming \emph{multicore} processors and
 \emph{manycore} (MIC) devices.
 
 \begin{figure}[htb]
   \centering
   \includegraphics[angle=0,width=0.9\hsize]{\RepFigures/scaling}
   \caption{Strong scaling with a $1024\times 4096$ grid size (left) and
   weak scaling (right) with grid sizes $1024\times 4096,10248\times 8192,
   4096\times 16384$ and $8192\times32768$ respectively for 1, 4, 16
   and 64 nodes.}
   \label{fig:scaling}
 \end{figure}
 
 \FloatBarrier
 
 \pagebreak
 \begin{thebibliography}{99}
   \bibitem{Briggs} {W.L.~Briggs, V.E.~Henson and S.F.~McCormick, A
     Multigrid Tutorial, Second Edition, SIAM (2000)}.
   \bibitem{MUMPS} \url{http://graal.ens-lyon.fr/MUMPS/}.
   \bibitem{MG1D} {\tt Multigrid Formulation for Finite Elements},\\ 
     \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/multigrid/docs/multigrid.pdf}
   \bibitem{TEMPL} {R. Barrett, M. Berry, T. F. Chan,
     J. Demmel, J. Donato, J. Dongarra, V. Eijkhout,
     R. Pozo, C. Romine and H. Van der Vorst,
     Templates for the Solution of Linear Systems: Building Blocks for
     Iterative Methods, 2nd Edition , SIAM, (1994)}.
     \bibitem{Wesseling} {P.~Wesseling, An Introduction to Multigrid
       Methods, Edwards, 2004}.
     \bibitem{salpha} {X. Lapillonne, S. Brunner, T. Dannert, S. Jolliet,
       A. Marinoni et al., Phys. Plasmas 16, 032308 (2009)}.
 \end{thebibliography}
 
 \end{document}
diff --git a/multigrid/docs/multigrid.tex b/multigrid/docs/multigrid.tex
index a8f694d..4af4d6b 100644
--- a/multigrid/docs/multigrid.tex
+++ b/multigrid/docs/multigrid.tex
@@ -1,949 +1,949 @@
 %
 % @file multigrid.tex
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 \documentclass[a4paper]{article}
 \usepackage{linuxdoc-sgml}
 \usepackage{graphicx}
 \usepackage{hyperref}
 \usepackage{amsmath}
 %\usepackage{verbatim}
 %\usepackage[notref]{showkeys}
 
 \title{\tt Multigrid Formulation for Finite Elements.}
 \author{Trach-Minh Tran, Stephan Brunner}
 \date{v0.2, October 2012}
 \abstract{A multigrid formulation for finite elements is
   derived, using variational principles. More specifically the grid
   transfer operators will be derived and tested in 1D Cartesian,
   cylindrical and spherical geometry for arbitrary order B-Splines.}
 
 \begin{document}
 \maketitle
 \tableofcontents
 
 \section{The discretized problem}
 Consider the one-dimensional linear integro-differential problem
 \begin{equation}
 \label{eq:oned_prob}
   \mathcal{L}(u) = f, \qquad 0\le x\le L,
 \end{equation}
 with suitable boundary conditions. On an \emph{equidistant} mesh with
 interval $h=L/N$ and using the \emph{weak form} of Eq.~(\ref{eq:oned_prob}),
 the linear system to be solved on this grid (which will be
 referred as the \emph{fine} grid) can be written as (see
 \cite{SOLVERS}, \cite{BSPLINES}):
   \begin{equation}
     \label{eq:fine}
     \sum_{i'=1}^{N+p}A_{ii'}^hu^h_{i'} = b^h_i, \qquad 
       A^h_{ii'}=\int_{0}^{L}\Lambda^h_i
       \mathcal{L}(\Lambda^h_{i'})\,x^{\alpha}dx, \qquad
       b^h_i = \int_{0}^{L}f\Lambda^h_i\,x^{\alpha}dx,
   \end{equation}
 where $p$ is the order the Splines $\Lambda^h_i$ and
 $\alpha=0,1,2$ for Cartesian, cylindrical and spherical coordinates
 respectively. It should be noted that the unknowns $u^h_i$ of this
 linear system are the \emph{expansion coefficients} of the discretized
 solution of the problem $u^h(x)$
 \begin{equation}
   u^h(x) = \sum_{i'=1}^{N+p}u^h_{i'}\Lambda^h_{i'}(x)
 \end{equation}
 and the right hand sides $b^h_i$ are defined as the \emph{projection} of
 $f(x)$ on the same basis functions, in contrast with the Finite
 Differences (FD) or Finite Volume (FV) formulations where $u^h_i$ and
 $b^h_i$ are the \emph{nodal values} of $u$ and $f$.
 
 On the \emph{coarser} mesh with interval $2h=2L/N$, the discretized
 linear system can be written as
   \begin{equation}
     \label{eq:coarse}
     \sum_{i'=1}^{N/2+p}A_{ii'}^{2h}u^{2h}_{i'} = b^{2h}_i, \qquad 
       A^{2h}_{ii'}=\int_{0}^{L}\Lambda^{2h}_i
       \mathcal{L}(\Lambda^{2h}_{i'})\,x^{\alpha}dx, \qquad
       b^{2h}_i = \int_{0}^{L}f\Lambda^{2h}_i\,x^{\alpha}dx.
   \end{equation}
 
 \section{Transfer operators}
 \label{sec:twogrid}
 For simplicity let consider the two-grid procedure \cite{Briggs} which can be
 summarized as follow:
 \begin{enumerate}
 \item Obtain an approximation $\mathbf{u}^h$ on the \emph{fine} grid, using a
   Gauss-Seidel (GS) or a weighted Jacobi scheme. This procedure is
   also called \emph{smoothing} or \emph{relaxation}.
 \item Compute the \emph{residuals}:
     $\mathbf{r}^h=\mathbf{b}^h-\mathbf{A}^h\mathbf{u}^h$. 
 \item Obtain the residuals on the coarse mesh $\mathbf{r^{2h}}$ by
   \emph{restriction} of $\mathbf{r}^h$.
 \item Direct solve $\mathbf{A}^{2h}\mathbf{e}^{2h}=\mathbf{r^{2h}}$ to obtain
   the error $\mathbf{e}^{2h}=\mathbf{u}-\mathbf{u}^{2h}$
 \item Interpolate (\emph{prolong}) the error to obtain $\mathbf{e}^h$.
 \item Correct the approximation obtained on the fine grid: 
    $\mathbf{u}^h\leftarrow \mathbf{u}^h+\mathbf{e}^h$.
 \item Relax on $\mathbf{A}^{h}\mathbf{u}^{h}=\mathbf{f^{h}}$, using
   the previously computed $\mathbf{u}^h$ as a guess.
 \end{enumerate}
 
 Steps 3 and 5 are called \emph{grid transfers} and are detailed in the
 following. It should be noted that the fine to coarse transfer
 (restriction) applies to the right hand side $\mathbf{b}^h$ while the
 prolongation applies to the expansion coefficients $\mathbf{u}^{2h}$.
 
 \subsection{Fine to coarse grid transfer (restriction)}
 The right hand side on the fine and coarse grid can be written as
 \begin{equation*}
   \begin{split}
     b^{h}_i &= \int_0^Lf\Lambda^{h}_i\,x^{\alpha}dx = 
       \sum_{i'=1}^{N+p}f^h_{i'}\underbrace{\int_0^L\Lambda^{h}_{i}\Lambda^{h}_{i'}\,x^{\alpha}dx}_
          {M^{h,h}_{ii'}}, \\
     b^{2h}_i &= \int_0^Lf\Lambda^{2h}_i\,x^{\alpha}dx = \sum_{i'=1}^{N+p}f^h_{i'}
     \underbrace{\int_0^L\Lambda^{2h}_{i}\Lambda^{h}_{i'}\,x^{\alpha}dx}_{M^{2h,h}_{ii'}} \\
   \end{split}
 \end{equation*}
 where the expansion $f(x)=\sum_{i=1}^{N+p}f^h_i\Lambda^h_i(x)$ has been used.
 Elimination of  $\mathbf{f^h}$ leads to the definition of the
 \emph{restriction} matrix:
 \begin{equation}
 \label{eq:restriction}
   \mathbf{b}^{2h} = \mathbf{R}^{2h}_{h}\mathbf{b}^h, 
   \qquad \boxed{\mathbf{R}^{2h}_{h}=\mathbf{M}^{2h,h}(\mathbf{M}^{h,h})^{-1}}.
 \end{equation}
 
 Note that the computation of the \emph{mass matrices}
 $\mathbf{M}^{h,h}$ and $\mathbf{M}^{2h,h}$ can be done \emph{exactly}
 using a Gauss integration with $N_G=\lceil p+(\alpha+1)/2 \rceil$ points.
 
 Another way to derive the restriction operator $\mathbf{R}^{2h}_{h}$
 is by noting that the basis functions $\Lambda^{2h}_{i}$ are
 \emph{piecewise} $C^{p-1}_h$ \emph{polynomials} with \emph{breaks} on
 the fine grid points $x_i=ih$,  and thus can be expressed
 \emph{uniquely} as 
 \begin{equation}
   \Lambda^{2h}_{i}(x) = \sum_{i'=1}^{N+p}c_{ii'}\Lambda^{h}_{i'}(x),
   \quad i=1\ldots N/2+p.
 \end{equation}
 Projecting this equation on the basis $\Lambda^{h}_j$ then leads to
 \begin{equation*}
   \begin{split}
     \sum_{i'=1}^{N+p}c_{ii'}\int_0^L\Lambda^{h}_{i'}\Lambda^{h}_{j}\,x^{\alpha}dx
     &= \int_0^L\Lambda^{2h}_{i}\Lambda^{h}_{j}\,x^{\alpha}dx, \qquad
     i=1\ldots N/2+p, \quad j=1,\ldots N+p \\
     \Longrightarrow \mathbf{c}\cdot\mathbf{M}^{h,h} &=
     \mathbf{M}^{2h,h} \Longrightarrow \mathbf{c} =
     \mathbf{M}^{2h,h}(\mathbf{M}^{h,h})^{-1} = \mathbf{R}^{2h}_{h} \\
   \end{split}
 \end{equation*}
 and finally
 \begin{equation}
 \label{eq:restrict_gen}
   \boxed{\Lambda^{2h}_{i}(x) =
       \sum_{i'=1}^{N+p}\left(\mathbf{R}^{2h}_{h}\right)_{ii'}\Lambda^{h}_{i'}(x), 
   \quad i=1\ldots N/2+p}
 \end{equation}
 Because the expansion coefficients $c_{ii'}$ of $\Lambda^{2h}_{i}(x)$
 (rows of the restriction matrix $\mathbf{R}^{2h}_{h}$) on the
 fine mesh basis are \emph{unique},  $\mathbf{R}^{2h}_{h}$ should be
 independent of the geometry exponent $\alpha$ or more generally, of the
 definition of the \emph{projection} (or scalar product) used to
 calculate the restriction matrix. Furthermore, since the supports of
 both $\Lambda^{h}_i$ and  $\Lambda^{2h}_i$ are \emph{compact}, the
 matrix $\mathbf{R}^{2h}_{h}$ should be \emph{sparse}.
 
 One can show that, using (\ref{eq:restrict_gen}), the
 restriction of the fine mesh FE matrix $\mathbf{A}^h$ is given by 
 \begin{equation}
 \label{eq_coarse_mat}
   \mathbf{A}^{2h} = \mathbf{R}^{2h}_h\mathbf{A}^{h}\left(\mathbf{R}^{2h}_h\right)^{T}.
 \end{equation}
 
 \subsection{Coarse to fine grid transfer (prolongation)}
 Let denote the discretized solution on the coarse mesh of
 $\mathbf{A}^{2h}\mathbf{u}^{2h}=\mathbf{R}^{2h}_{h}\mathbf{b}^h$
 by
 \begin{equation*}
   u^{2h}(x) = \sum_{i=1}^{N/2+p}u^{2h}_{i}\Lambda^{2h}_{i}(x),
 \end{equation*}
 and seek for an approximated solution on the fine mesh $\mathbf{u}^{h}$
 \begin{equation*}
   u^{h}(x) = \sum_{i=1}^{N+p}u^{h}_{i}\Lambda^{h}_{i}(x).
 \end{equation*}
 by \emph{prolongation} of $\mathbf{u}^{2h}$ (instead of solving
 $\mathbf{A}^{h}\mathbf{u}^{h}=\mathbf{b}^h$). A reasonable solution is 
 to \emph{minimize} the square of the error norm defined as
 \begin{equation*}
   \begin{split}
     \epsilon^2 &= \|u^{h}(x)-u^{2h}(x)\|^2 \equiv \int_0^L
     [u^{h}(x)-u^{2h}(x)]^2\,x^\alpha dx, \\
     \frac{\partial\epsilon^2}{\partial u^h_i}   &=0 \Longrightarrow
     \sum_{i'=1}^{N+p}u^{h}_{i}\int_0^L
     \Lambda^{h}_{i}\Lambda^{h}_{i'}\,x^\alpha dx =
     \sum_{i'=1}^{N/2+p}u^{2h}_{i}\int_0^L
     \Lambda^{h}_{i}\Lambda^{2h}_{i'}\,x^\alpha dx. \\
   \end{split}
 \end{equation*}
 
 This yields the prolonged (or interpolated) \emph{coarse grid}
   solution on the \emph{fine grid}
 \begin{equation}
 \label{eq:prolongation}
   \mathbf{u}^h = \mathbf{P}^h_{2h}\mathbf{u}^{2h}, \qquad
   \boxed{\mathbf{P}^h_{2h} =
     (\mathbf{M}^{h,h})^{-1}\mathbf{M}^{h,2h}=(\mathbf{R}^{2h}_{h})^T}
 \end{equation}
 and the coarse FE matrix can be finally expressed as\begin{equation}
   \label{eq:coarse_mat}
   \boxed{\mathbf{A}^{2h} = \mathbf{R}^{2h}_{h}\mathbf{A}^{h}\mathbf{P}^h_{2h}}
 \end{equation}
 
 \subsection{An alternative derivation of grid transfer operators}
 Starting from the inter grid transformation of the basis functions
 Eq.(\ref{eq:restrict_gen}), the
 restriction of $\mathbf{b}^h$ and the prolongation of
 $\mathbf{u}^{2h}$ can be derived as follow
 \begin{gather*}
   b^{2h}_i = \int_0^L f\Lambda^{2h}_i\,x^{\alpha}dx = \sum_{i'=1}^{N+p}
     \left(\mathbf{R}^{2h}_{h}\right)_{ii'}\int_0^Lf\Lambda^{h}_{i'}\,x^{\alpha}dx
       =\sum_{i'=1}^{N+p} \left(\mathbf{R}^{2h}_{h}\right)_{ii'}b^h_{i'}, \\
       u^{2h}(x) = \sum_{i=1}^{N/2+p} u^{2h}_{i} \Lambda^{2h}_{i}=
   \sum_{i'=1}^{N+p}\underbrace{\left[\sum_{i=1}^{N/2+p}
   \left(\mathbf{R}^{2h}_{h}\right)_{ii'}
   u^{2h}_{i}\right]}_{u^h_{i'}}\Lambda^{h}_{i'}(x) \Longrightarrow 
   \mathbf{u}^h = \left(\mathbf{R}^{2h}_{h}\right)^T\mathbf{u}^{2h} =
   \mathbf{P}^h_{2h}\mathbf{u}^{2h}.\\ 
 \end{gather*}
 
 \section{Numerical results for the transfer operators}
 The prolongation matrix as defined in Eq.~(\ref{eq:prolongation}) was
 calculated using the BSPLINES module. A Gauss integration with
 $N_G=\lceil p+(\alpha+1)/2 \rceil$ points is used to carry out the
 numerical integrations.
 In the following, the results are presented
 for linear, quadratic and cubic Splines. Since the
 restriction matrix is just the transpose of the prolongation matrix,
 only the latter is shown. As expected, all the obtained matrices
 are found to be \emph{independent} of $\alpha$ and \emph{sparse}.
 
 During the calculations, it was checked that
 \begin{itemize}
 \item The coarse matrix computed using Eq.~(\ref{eq:coarse_mat}) and
   the transfer matrix, is identical to the matrix assembled directly
   on the coarse grid. 
 \item The sum of each row of the prolongation matrix is 1, since a
   constant function ($\mathbf{u}^{2h}=1$) should remain constant after
   the grid transfer.
 
 \end{itemize}
 
 \subsection{Linear Splines}
 For $N=8$, the prolongation is a $9\times 5$ matrix given by
 \begin{equation}
   \mathbf{P}^{h}_{2h} =
   \left(
   \begin{matrix}
     1   & 0   & 0   & 0   & 0  \\
     1/2 & 1/2 & 0   & 0   & 0  \\
     0   & 1   & 0   & 0   & 0  \\
     0   & 1/2 & 1/2 & 0   & 0  \\
     0   & 0   & 1   & 0   & 0  \\
     0   & 0   & 1/2 & 1/2 & 0  \\
     0   & 0   & 0   & 1   & 0  \\
     0   & 0   & 0   & 1/2 & 1/2\\
     0   & 0   & 0   & 0   & 1   \\
   \end{matrix}\right)
 \end{equation}
 As expected, the prolongation matrix for linear Splines is
 identical to the one obtained for first order FD discretization, where
 a linear interpolation is used. One can easily check that
 \begin{equation*}
   \begin{split}
     \Lambda^{2h}_1(x) &= \Lambda^h_1(x) + \frac{1}{2}\Lambda^h_2(x), \\
     \Lambda^{2h}_2(x) &= \frac{1}{2}\Lambda^h_2(x) + \Lambda^h_3(x) +
     \frac{1}{2}\Lambda^h_4(x), \\ 
   \end{split}
 \end{equation*}
 as expected from (\ref{eq:restrict_gen}).
 
 \subsection{Quadratic Splines}
 For $N=8$, the prolongation is a $10\times 6$ matrix given by
 \begin{equation}
   \mathbf{P}^{h}_{2h} =
   \left(
   \begin{matrix}
     1   & 0   & 0   & 0   & 0   & 0  \\
     1/2 & 1/2 & 0   & 0   & 0   & 0  \\
     0   & 3/4 & 1/4 & 0   & 0   & 0  \\
     0   & 1/4 & 3/4 & 0   & 0   & 0  \\
     0   & 0   & 3/4 & 1/4 & 0   & 0  \\
     0   & 0   & 1/4 & 3/4 & 0   & 0  \\
     0   & 0   & 0   & 3/4 & 1/4 & 0  \\
     0   & 0   & 0   & 1/4 & 3/4 & 0  \\
     0   & 0   & 0   & 0   & 1/2 & 1/2\\
     0   & 0   & 0   & 0   & 0   & 1  \\
   \end{matrix}\right)
 \end{equation}
 
 \subsection{Cubic Splines}
 For $N=10$, the prolongation is a $13\times 8$ matrix given by
 \begin{equation}
   \mathbf{P}^{h}_{2h} =
   \left(
   \begin{matrix}
     1   & 0    & 0     & 0   & 0   & 0     & 0    & 0  \\
     1/2 & 1/2  & 0     & 0   & 0   & 0     & 0    & 0  \\
     0   & 3/4  & 1/4   & 0   & 0   & 0     & 0    & 0  \\
     0   & 3/16 & 11/16 & 1/8 & 0   & 0     & 0    & 0  \\
     0   & 0    & 1/2   & 1/2 & 0   & 0     & 0    & 0  \\
     0   & 0    & 1/8   & 3/4 & 1/8 & 0     & 0    & 0  \\
     0   & 0    & 0     & 1/2 & 1/2 & 0     & 0    & 0  \\
     0   & 0    & 0     & 1/8 & 3/4 & 1/8   & 0    & 0  \\
     0   & 0    & 0     & 0   & 1/2 & 1/2   & 0    & 0  \\
     0   & 0    & 0     & 0   & 1/8 & 11/16 & 3/16 & 0  \\
     0   & 0    & 0     & 0   & 0   & 1/4   & 3/4  & 0  \\
     0   & 0    & 0     & 0   & 0   & 0     & 1/2  & 1/2\\
     0   & 0    & 0     & 0   & 0   & 0     & 0    & 1  \\
   \end{matrix}\right)
 \end{equation}
 
 Note that from the results shown above, it is straightforward to derive the
 prolongation matrix for other number of intervals $N$.
 
 \section{Practical Considerations}
 \subsection{Boundary conditions}
 The \emph{essential Dirichlet boundary conditions} are imposed by zeroing the column
 and row (first column and first row for the left boundary and last column
 and last row for the right boundary) of the FE matrix $\mathbf{A}^h$ and
 putting 1 on the diagonal. The same operation should be also performed
 on the prolongation matrix, preserving thus the relation
 (\ref{eq:coarse_mat}). For non-homogeneous Dirichlet boundary
 conditions, the elements of the column should be saved before the
 zeroing operation (for example $A^h_{21}, A^h_{31}, \ldots$ for the left
 boundary condition). They will be used later to modify the right hand
 side:
 \begin{equation*}
   b^h_i \leftarrow b^h_i-A^h_{i1}u^h_1, \quad i=2,\ldots
 \end{equation*}
 
 Nothing has to be done for \emph{natural boundary conditions}.
 
 \subsection{Residual norm and error}
 The residual norm is simply defined as the Euclidean norm of the
 residue:
 
 \begin{equation}
 \label{eq:resid}
 \|r\|_2 = \|\mathbf{b}-\mathbf{A}\mathbf{u}\|_2 =
 \sqrt{\sum_i\left(b_i-\sum_{i'}A_{ii'}u_{i'}\right)^2}.
 \end{equation}
 When the \emph{exact} solution $u(x)$ is known, the \emph{discretization error}
 can defined as
 \begin{equation}
 \label{eq:discerr}
   \|e\|_2 = \sqrt{\int x^\alpha dx\left[\sum_{i}u_{i}\Lambda_i(x)-u(x)\right]^2}
 \end{equation}
 and computed using a Gauss quadrature. Note that for Splines of order
 $p$, $\|e\|_2(h)$ converges to zero as $O(h^{p+1})$. 
 
 \section{The Model Problems}
 \subsection{Cartesian geometry}
 The following second-order boundary value problem is considered:
 \begin{equation}
 \label{eq:cartesian_problem}
   \begin{split}
   -\frac{d^2}{dx^2} u(x) + \sigma u(x) &= \sin (\pi kx), \qquad 0\le x\le 1 \\
     u(0)=u(1) &= 0 \\
     \Rightarrow u(x) = \frac{\sin(\pi kx)}{\pi^2k^2+\sigma}.& \\
   \end{split}
 \end{equation}
 Using the weak form, the FE discretized matrix and right hand side can
 be computed as 
 \begin{equation}
   A_{ii'} = \int_0^1dx\left[\Lambda'_i(x)\Lambda'_{i'}(x) +
     \sigma\Lambda_i(x)\Lambda_{i'}(x)\right], \qquad
   b_i =  \int_0^1dx \sin (\pi kx)\Lambda_i(x).
 \end{equation}
 For Splines of order $p$, the integration is done with a $\lceil
 p+1/2\rceil$ point Gauss quadrature which is \emph{exact}
 for the matrix $\mathbf{A}$ if $\sigma$ is constant.
 
 The boundary conditions are simply imposed by setting
 \begin{equation*}
   A_{ki}=A_{ik}=\delta_{ik} \qquad\mbox{and} \qquad b_k=0
 \end{equation*}
 for $k=1$ (the first equation) and $N+p$ (the last equation).
 
 \subsection{Cylindrical geometry}
 The following second-order boundary value problem is considered:
 \begin{equation}
   \begin{split}
   -\frac{1}{r}\frac{d}{dr}r\frac{d}{dr}u(r) + \frac{m^2}{r^2}u(r) &= 
   j^2_{ms}J_{m}(j_{ms}r), \qquad 0\le r\le 1, \quad j_{ms} =
   s^{th}\mbox{ zero of }J_{m}, \\
     u(1) &= 0 \\
     \Rightarrow u(r) = J_{m}(j_{ms}r).& \\
   \end{split}
 \end{equation}
 Using the weak form, the FE discretized matrix and right hand side can
 be computed as 
 \begin{equation}
   A_{ii'} = \int_0^1rdr\left[\Lambda'_i(r)\Lambda'_{i'}(r) +
     \frac{m^2}{r^2}\Lambda_i(r)\Lambda_{i'}(r)\right], \qquad
   b_i =  \int_0^1rdr j^2_{ms}\,J_{m}(j_{ms}r)\Lambda_i(r).
 \end{equation}
 The boundary condition has only to be imposed on the last equation,
 using the same procedure described for the Cartesian geometry.
 
 It should be noted here that for $m\neq 0$, the matrix elements
 $A_{1i}$ and $A_{i1}$ \emph{diverge} since $\Lambda_1(r)$ is not
 equal to zero at $r=0$. However, using a \emph{direct solver}, one can observe
 that the resulting \emph{discretization errors} as defined by
 Eq.(\ref{eq:discerr}) converge for number of Gauss points $N_G$ slightly
 larger than $p+1$, as shown in Table~\ref{tab:gauss_conv}. Then, using
 $N_G=4$ and $6$ for the linear and cubic splines respectively, the
 discretization error as a function of the number of grid intervals
 (Fig~\ref{fig:cyl_conv}) show the expected quadratic and quartic
 scaling respectively for the linear and cubic Splines.
 
 \begin{figure}
   \centering
   \includegraphics[angle=0,width=0.8\hsize]{cyl_conv}
   \caption{Discretization errors $\|e\|_2$ obtained by a \emph{direct
       solver} versus the number of grid intervals $N$. A linear fit
     yields a quadratic scaling ($\sim N^{-2.0}$) for the linear Splines
   and a quartic convergence ($\sim N^{-4.3}$) for the cubic Splines.}
   \label{fig:cyl_conv}
 \end{figure}
 
 \begin{table}
   \centering
   \begin{tabular}{|c|c|c|}\hline
 Number of Gauss points & $p=1$     & $p=3$     \\ \hline
   2                    & 8.319E-04 &           \\
   4                    & 9.277E-04 & 5.799E-07 \\
   6                    & 9.276E-04 & 5.936E-07 \\
   8                    & 9.276E-04 & 5.936E-07 \\\hline
   \end{tabular}
   \caption{Convergence of the \emph{discretization error} with respect
   to the number of Gauss points for the cylindrical problem with $m=1$,
   $s=10$ on a $128$ interval grid.}
   \label{tab:gauss_conv}
 \end{table}
 
 \section{The Multigrid Schemes}
 The two grid procedure described in section (\ref{sec:twogrid}) can be
 generalized as follow.
 Let $\nu_1$, $\nu_2$ and $\mu$ be three iteration parameters. 
 Given a guess $\mathbf{u}^h$ and right hand side $\mathbf{b}^h$ at the
 \emph{finest} level, a MG cycle represented by 
 \begin{equation*}
   \boxed{\mathbf{u}^h \leftarrow MG^h(\mathbf{u}^h,\mathbf{b}^h)}
 \end{equation*}
 will compute a \emph{new} $\mathbf{u}^h$ and is defined recursively by the
 following steps: 
 
 \begin{enumerate}
 \item If $h$ is the coarsest mesh size, direct solve
   $\mathbf{A}^h\mathbf{u}^h=\mathbf{b}^h$ and return.
   \item Else 
     \begin{itemize}
     \item Relax $\mathbf{u}^h$ $\nu_1$ times.
     \item $\mathbf{b}^{2h} \leftarrow
       \mathbf{R}^{2h}_h(\mathbf{b}^h-\mathbf{A}^h\mathbf{u}^h), \quad
       \mathbf{u}^{2h}\leftarrow 0$.
     \item $\mathbf{u}^{2h} \leftarrow MG^{2h}(\mathbf{u}^{2h},\mathbf{b}^{2h})$ $\mu$ times.
     \item $\mathbf{u}^h\leftarrow
       \mathbf{u}^h+\mathbf{P}^{h}_{2h}\mathbf{u}^{2h}$.
     \item Relax $\mathbf{u}^h$ $\nu_2$ times.
     \end{itemize} 
 \end{enumerate}
 
 The standard $V$-cycle is obtained for $\mu=1$ while $\mu=2$ results in
 the $W$-cycle. Usually the number of \emph{pre-smooth} and
 \emph{post-smooth} sweeps $\nu_1$ and $\nu_2$ is limited to 1 or 2. In the
 following a $V$-cycle will be denoted by $V(\nu_1,\nu_2)$.
 
 Another multigrid algorithm called \emph{Full Multigrid} or FMG does
 not require an input guess $\mathbf{u}^h$ but solves first the
 problem on coarser grids and uses one or many MG cycles to obtain the problem
 solution. It can be represented by
 \begin{equation*}
   \boxed{\mathbf{u}^h \leftarrow FMG^h(\mathbf{b}^h)}
 \end{equation*}
 and defined recursively by the following steps:
 
 \begin{enumerate}
 \item If $h$ is the coarsest mesh size, direct solve
   $\mathbf{A}^h\mathbf{u}^h=\mathbf{b}^h$ and return.
   \item Else 
     \begin{itemize}
     \item $\mathbf{b}^{2h} \leftarrow  \mathbf{R}^{2h}_h(\mathbf{b}^h)$.
     \item $\mathbf{u}^{2h} \leftarrow FMG^{2h}(\mathbf{b}^{2h})$.
     \item $\mathbf{u}^h \leftarrow \mathbf{P}^{h}_{2h}\mathbf{u}^{2h}$.
     \item $\mathbf{u}^h \leftarrow MG^h(\mathbf{u}^h,\mathbf{b}^h)$ $\nu_0$ times.
     \end{itemize} 
 \end{enumerate}
 
 Note that while the MG process is an iterative process (started by
 setting for example the initial guess $\mathbf{u}^h=0$), the FMG is
 more like a \emph{direct solver} with appropriate values of $\nu_1$,
 $\nu_2$ and  $\nu_0$ determined experimentally.
 
 \section{Numerical Experiments}
 The residual norm $\|r\|_2$ and error $\|e\|_2$ defined
 previously are reported after each $V$-cycle in
 Table~\ref{tab:cartesian1} for the Cartesian model problem and in
 Table~\ref{tab:cylindrical1} for the cylindrical one . The ratio
 between successive cycle $\|r\|_2$ 
 and $\|e\|_2$ are shown in columns labeled \emph{ratio} and measure
 the rate of iteration convergence. The \emph{asymptotic} ratio of
 $\|r\|_2$ is called the \emph{convergence factor}.
 
 In all the cases shown, one can note that $\|e\|_2$ level off quickly to the
 discretization error obtained by using the \emph{direct solver} on the
 finest grid, while the residual norms $\|r\|_2$ continue to  decrease until
 the machine zero is eventually reached. One can also verify that the \emph{final} 
 discretization errors scale approximately as $8^2$ and $8^4$
 respectively for linear and cubic Splines, as $N$ is increased from
 $128$ to $1024$.
 
 Most interestingly, the \emph{iterative performance} depends very weakly
 on the problem size $N$, for both the Cartesian and the cylindrical
 cases. Moreover, the multigrid  seems to be less efficient when linear Splines are used for
 the problem discretization. This iterative performance can be further improved by
 increasing the \emph{iteration parameters} $\nu_1$, $\nu_2$ and $\mu$,
 as shown in Table~\ref{tab:improv}. One can also observe in the same
 table that the Jacobi relaxation is systematically less efficient than Gauss
 Seidel relaxation.
 
 \begin{table}
 \centering
 \begin{tabular}{|c|c|c|c|c|c|c|c|c|}\hline
  \multicolumn{9}{|c|}{ Linear B-Splines $p=1$} \\ \hline
    & \multicolumn{4}{c|}{ $N=128$}            
    & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9}
 $V$-cycle  & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & 
            $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline
    0 & 6.219E-02 &       & 7.164E-04 &       & 2.210E-02 &       & 7.164E-04 &       \\
    1 & 2.169E-02 &  0.35 & 5.880E-05 &  0.08 & 9.699E-03 &  0.44 & 3.622E-05 &  0.05 \\
    2 & 3.801E-03 &  0.18 & 7.806E-06 &  0.13 & 1.790E-03 &  0.18 & 1.965E-06 &  0.05 \\
    3 & 5.061E-04 &  0.13 & 3.666E-06 &  0.47 & 2.923E-04 &  0.16 & 1.583E-07 &  0.08 \\
    4 & 6.762E-05 &  0.13 & 3.564E-06 &  0.97 & 4.055E-05 &  0.14 & 6.197E-08 &  0.39 \\
    5 & 8.902E-06 &  0.13 & 3.585E-06 &  1.01 & 5.586E-06 &  0.14 & 5.655E-08 &  0.91 \\
    6 & 1.199E-06 &  0.13 & 3.589E-06 &  1.00 & 7.122E-07 &  0.13 & 5.622E-08 &  0.99 \\
    7 & 1.585E-07 &  0.13 & 3.590E-06 &  1.00 & 9.815E-08 &  0.14 & 5.620E-08 &  1.00 \\
    8 & 2.089E-08 &  0.13 & 3.590E-06 &  1.00 & 1.320E-08 &  0.13 & 5.619E-08 &  1.00 \\
    9 & 2.746E-09 &  0.13 & 3.590E-06 &  1.00 & 1.887E-09 &  0.14 & 5.619E-08 &  1.00 \\
   10 & 3.741E-10 &  0.14 & 3.590E-06 &  1.00 & 2.533E-10 &  0.13 & 5.619E-08 &  1.00 \\
 \hline \hline
 \multicolumn{9}{|c|}{ Cubic B-Splines $p=3$} \\ \hline
    & \multicolumn{4}{c|}{ $N=128$}            
    & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9}
 $V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & 
            $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline
    0 & 6.187E-02 &       & 7.164E-04 &       &  2.209E-02 &       & 7.164E-04 &       \\
    1 & 1.948E-04 &  0.00 & 1.893E-06 &  0.00 &  1.685E-05 &  0.00 & 4.292E-08 &  0.00 \\
    2 & 4.316E-06 &  0.02 & 3.927E-09 &  0.00 &  1.241E-07 &  0.01 & 7.156E-11 &  0.00 \\
    3 & 1.554E-07 &  0.04 & 2.374E-09 &  0.60 &  4.184E-09 &  0.03 & 6.198E-13 &  0.01 \\
    4 & 5.750E-09 &  0.04 & 2.373E-09 &  1.00 &  1.560E-10 &  0.04 & 5.635E-13 &  0.91 \\
    5 & 2.153E-10 &  0.04 & 2.373E-09 &  1.00 &  5.912E-12 &  0.04 & 5.635E-13 &  1.00 \\
    6 & 8.122E-12 &  0.04 & 2.373E-09 &  1.00 &  2.258E-13 &  0.04 & 5.635E-13 &  1.00 \\
    7 & 3.079E-13 &  0.04 & 2.373E-09 &  1.00 &  8.777E-15 &  0.04 & 5.635E-13 &  1.00 \\
    8 & 1.173E-14 &  0.04 & 2.373E-09 &  1.00 &  1.758E-15 &  0.20 & 5.635E-13 &  1.00 \\
    9 & 4.489E-16 &  0.04 & 2.373E-09 &  1.00 &  1.709E-15 &  0.97 & 5.635E-13 &  1.00 \\
   10 & 9.571E-17 &  0.21 & 2.373E-09 &  1.00 &  1.761E-15 &  1.03 &
   5.635E-13 &  1.00 \\ \hline
 \end{tabular}
 \caption{The multigrid $V(1,1)$ performance with Gauss-Seidel relation
   for a \emph{Cartesian} problem with $k=10$ and $\sigma=0$,
   discretized on a grid with $N=128$ and $1024$ intervals, using
   linear and cubic B-splines. For 
   both grid sizes, a total of 6 grid levels were considered.}
 \label{tab:cartesian1}
 \end{table}
 
 \begin{table}
 \centering
 \begin{tabular}{|c|c|c|c|c|c|c|c|c|}\hline
  \multicolumn{9}{|c|}{ Linear B-Splines $p=1$} \\ \hline
    & \multicolumn{4}{c|}{ $N=128$}            
    & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9}
 $V$-cycle  & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & 
            $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline
    0 & 1.789E+01 &           & 9.354E-02 &      & 6.400E+00 &      & 9.354E-02 &       \\
    1 & 3.373E+00 &      0.19 & 3.068E-03 & 0.03 & 1.826E+00 & 0.29 & 3.036E-03 & 0.03  \\
    2 & 4.895E-01 &      0.15 & 8.064E-04 & 0.26 & 3.133E-01 & 0.17 & 1.624E-04 & 0.05  \\
    3 & 6.160E-02 &      0.13 & 6.704E-04 & 0.83 & 4.581E-02 & 0.15 & 1.411E-05 & 0.09  \\
    4 & 8.013E-03 &      0.13 & 6.811E-04 & 1.02 & 5.959E-03 & 0.13 & 1.062E-05 & 0.75  \\
    5 & 9.871E-04 &      0.12 & 6.844E-04 & 1.00 & 8.098E-04 & 0.14 & 1.069E-05 & 1.01  \\
    6 & 1.283E-04 &      0.13 & 6.847E-04 & 1.00 & 1.048E-04 & 0.13 & 1.070E-05 & 1.00  \\
    7 & 1.613E-05 &      0.13 & 6.847E-04 & 1.00 & 1.504E-05 & 0.14 & 1.070E-05 & 1.00  \\
    8 & 2.097E-06 &      0.13 & 6.847E-04 & 1.00 & 2.050E-06 & 0.14 & 1.070E-05 & 1.00  \\
    9 & 2.639E-07 &      0.13 & 6.847E-04 & 1.00 & 3.008E-07 & 0.15 & 1.070E-05 & 1.00  \\
   10 & 3.500E-08 &      0.13 & 6.847E-04 & 1.00 & 4.074E-08 & 0.14 & 1.070E-05 & 1.00  \\
 \hline \hline
 \multicolumn{9}{|c|}{ Cubic B-Splines $p=3$} \\ \hline
    & \multicolumn{4}{c|}{ $N=128$}            
    & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9}
 $V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & 
            $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline
    0 & 1.768E+01 &      & 9.354E-02 &      & 6.399E+00 &      & 9.354E-02 &       \\
    1 & 4.243E-02 & 0.00 & 4.727E-05 & 0.00 & 4.975E-03 & 0.00 & 6.588E-06 & 0.00  \\
    2 & 1.378E-03 & 0.03 & 1.897E-06 & 0.04 & 7.835E-05 & 0.02 & 6.578E-09 & 0.00  \\
    3 & 4.773E-05 & 0.03 & 1.814E-06 & 0.96 & 2.797E-06 & 0.04 & 4.125E-10 & 0.06  \\
    4 & 2.174E-06 & 0.05 & 1.814E-06 & 1.00 & 1.041E-07 & 0.04 & 4.092E-10 & 0.99  \\
    5 & 4.816E-07 & 0.22 & 1.814E-06 & 1.00 & 3.935E-09 & 0.04 & 4.092E-10 & 1.00  \\
    6 & 1.942E-07 & 0.40 & 1.814E-06 & 1.00 & 1.499E-10 & 0.04 & 4.092E-10 & 1.00  \\
    7 & 8.887E-08 & 0.46 & 1.814E-06 & 1.00 & 5.757E-12 & 0.04 & 4.092E-10 & 1.00  \\
    8 & 4.449E-08 & 0.50 & 1.814E-06 & 1.00 & 2.517E-13 & 0.04 & 4.092E-10 & 1.00  \\
    9 & 2.377E-08 & 0.53 & 1.814E-06 & 1.00 & 1.360E-13 & 0.54 & 4.092E-10 & 1.00  \\
   10 & 1.328E-08 & 0.56 & 1.814E-06 & 1.00 & 1.384E-13 & 1.02 & 4.092E-10 & 1.00  \\ \hline
 \end{tabular}
 \caption{The multigrid $V(1,1)$ performance with Gauss-Seidel relation
   for a one-dimensional \emph{cylindrical} problem with $m=22$ and
   $s=10$, discretized on a grid with
   $N=128$ and $1024$ intervals, using linear and cubic B-splines. For
   both grid sizes, a total of 6 grid levels were considered.}
 \label{tab:cylindrical1}
 \end{table}
 
 \begin{table}
 \centering
 \begin{tabular}{|c|c|c|c|c|}\hline
    & \multicolumn{2}{c|}{ Cartesian problem}            
    & \multicolumn{2}{c|}{ Cylindrical problem}  \\ \cline{2-5}
                        & $N=128$ &  $N=1024$ & $N=128$  &  $N=1024$\\ \hline
 $\nu_1=1, \nu_2=1, \mu=1$ & 0.13 &  0.14 &  0.13 &  0.14 \\
 $\nu_1=1, \nu_2=2, \mu=1$ & 0.08 &  0.08 (\emph{0.10}) &  0.08 &  0.08 (\emph{0.09}) \\
 $\nu_1=2, \nu_2=1, \mu=1$ & 0.08 &  0.08 &  0.08 &  0.08 \\
 $\nu_1=2, \nu_2=2, \mu=1$ & 0.04 &  0.04 (\emph{0.08}) &  0.02 &  0.03 (\emph{0.08})\\
 $\nu_1=1, \nu_2=1, \mu=2$ & 0.12 &  0.11 &  0.12 &  0.11 \\ \hline
 \end{tabular}
 \caption{The \emph{convergence factor} (averaged over
   the last 5 cycles) for different iteration parameters $\nu_1$, $\nu_2$ and
   $\mu$, using the linear Splines  for both Cartesian ($k=10$,
   $\sigma=0$) and cylindrical ($m=22$, $s=10$) problems. The last
   entry is usually called  
   a $W$-cycle while the first four designate a $V(\nu_1,\nu_2)$
   cycle. Gauss Seidel relaxation is used except for the results enclosed
   in parenthesis which are obtained with the Jacobi
   (weighted  with $\omega=2/3$) relaxation.}
 \label{tab:improv}
 \end{table}
 
 The next experiment is shown on Table~\ref{tab:fmg}, where two FMG$(\nu_1,\nu_2)$
 schemes are applied to the $m=22$, $s=10$ cylindrical problem with
 grid sizes up to $N=2048$. Note that the problem is solved to the level of
 discretization for $N\ge 128$ with FMG$(2,1)$ but not with FMG$(1,1)$. Solving the same
 problem with the $V(2,1)$ cycle required 3 iterations for all the values of
 $N$ shown. Since the cost of one FMG(2,1) is $\sim 2$ the cost
 of one $V(2,1)$ (see Appendix \ref{sec:cost}), it appears that FMG is
 more efficient for $N\ge 128$. 
 
 Finally, in all the cases shown here, the equality (\ref{eq:coarse_mat})
 is verified numerically, except for the cylindrical case with $m\neq 0$. This is
 expected since as noted earlier, the matrix elements $A_{i1}$ and
 $A_{1i}$ diverge unless $m=0$ in the cylindrical problem.
 
 \begin{table}
 \centering
 \begin{tabular}{|c|c|c|c|c|}\hline
 & \multicolumn{2}{c|}{FMG(1,1)}
 & \multicolumn{2}{c|}{FMG(2,1)} \\  \cline{2-5}
  $N$  & $\|e\|_2$  & $\|e\|_2/\|e\|_d$ & $\|e\|_2$  & $\|e\|_2/\|e\|_d$ \\ \hline
     4 & 1.011E-01 & 0.968 & 1.012E-01 & 0.969 \\
     8 & 7.781E-02 & 1.031 & 7.679E-02 & 1.018 \\
    16 & 3.332E-02 & 1.310 & 2.808E-02 & 1.104 \\
    32 & 1.516E-03 & 1.421 & 1.098E-03 & 1.030 \\
    64 & 5.168E-05 & 1.443 & 3.652E-05 & 1.019 \\
   128 & 2.012E-06 & 1.109 & 1.818E-06 & 1.002 \\
   256 & 1.125E-07 & 1.053 & 1.069E-07 & 1.001 \\
   512 & 6.819E-09 & 1.037 & 6.576E-09 & 1.000 \\
  1024 & 4.224E-10 & 1.032 & 4.093E-10 & 1.000 \\
  2048 & 2.634E-11 & 1.031 & 2.556E-11 & 1.000 \\
 \hline
 \end{tabular}
 \caption{The discretization errors $\|e\|_2$ obtained from a 
   FMG$(\nu_1,\nu_2)$  sweep with $\nu_0=1$ for different grid sizes $N$. The columns
   $\|e\|_2/\|e\|_d$ display their ratio with the discretization errors
   obtained from a \emph{direct} solver. The cylindrical problem with
   $m=22$ and $s=10$ using cubic Splines is considered here.} 
 \label{tab:fmg}
 \end{table}
 
 \section{Periodic Case}
 \subsection{Transfer operators}
 For periodic problems, we use \emph{periodic} Splines \cite{BSPLINES} which
 satisfy the periodic boundary condition
 $\Lambda^h_{i+N}(x)=\Lambda^h_i(x-Nh)$. As a result, both the expansion
 coefficients and the right hand sides are periodic
 with periodicity $N$ ($u^h_{i+N}=u^h_i$, $b^h_{i+N}=b^h_i$) and he rank
 of all matrices should be $N$ instead of $N+p$ as in the non-periodic case.
 
 The \emph{prolongation} matrix $\mathbf{P}^h_{2h}$ as given by
 (\ref{eq:prolongation}) are computed numerically and the results
 for $N=8$ are given below for linear, quadratic and cubic Splines.
 
 \begin{itemize}
 \item Linear Splines
   \begin{equation}
     \mathbf{P}^{h}_{2h} =
     \left(
     \begin{matrix}
       1   & 0   & 0   & 0   \\
       1/2 & 1/2 & 0   & 0   \\
       0   & 1   & 0   & 0   \\
       0   & 1/2 & 1/2 & 0   \\
       0   & 0   & 1   & 0   \\
       0   & 0   & 1/2 & 1/2 \\
       0   & 0   & 0   & 1   \\
       1/2 & 0   & 0   & 1/2 \\
     \end{matrix}\right)
   \end{equation}
 \item Quadratic Splines
   \begin{equation}
     \mathbf{P}^{h}_{2h} =
     \left(
     \begin{matrix}
       3/4 & 1/4 & 0   & 0   \\
       1/4 & 3/4 & 0   & 0   \\
       0   & 3/4 & 1/4 & 0   \\
       0   & 1/4 & 3/4 & 0   \\
       0   & 0   & 3/4 & 1/4 \\
       0   & 0   & 1/4 & 3/4 \\
       1/4 & 0   & 0   & 3/4 \\
       3/4 & 0   & 0   & 1/4 \\
     \end{matrix}\right)
   \end{equation}
 \item Cubic Splines
   \begin{equation}
     \mathbf{P}^{h}_{2h} =
     \left(
     \begin{matrix}
       1/2 & 1/2 & 0   & 0   \\
       1/8 & 3/4 & 1/8 & 0   \\
       0   & 1/2 & 1/2 & 0   \\
       0   & 1/8 & 3/4 & 1/8 \\
       0   & 0   & 1/2 & 1/2 \\
       1/8 & 0   & 1/8 & 3/4 \\
       1/2 & 0   & 0   & 1/2 \\
       3/4 & 1/8 & 0   & 1/8 \\
     \end{matrix}\right)
   \end{equation}
 \end{itemize}
 The restriction matrix is simply $\mathbf{R}^{2h}_h=
 (\mathbf{P}^{h}_{2h})^T$. Generalization for any other number of intervals $N$ should be
 straightforward.
 
 \subsection{Numerical Experiments}
 In order to test the grid transfer operators obtained above, the same
 second-order problem (\ref{eq:cartesian_problem}) but with the
 periodic boundary condition $u(x+1)=u(x)$ is considered. It should be
 noted that in that case, if $\sigma=0$, the problem is singular since
 the solution is not \emph{unique}! But we have observed that this
 problem can be avoided for a slightly non zero $\sigma$, 
 
 With $\sigma=0.01$ and $k=10$ and using linear and cubic Splines, we
 recover the same multigrid iterative performances shown in Table
 \ref{tab:cartesian1} obtained previously for non-periodic Dirichlet
 boundary conditions. Table \ref{tab:quad_splines} also shows similar
 iterative efficiencies for \emph{quadratic} non-periodic and periodic problems.
 
 The identity (\ref{eq:coarse_mat}) is numerically verified in all the
 cases considered.
 
 \begin{table}
 \centering
 \begin{tabular}{|c|c|c|c|c|c|c|c|c|}\hline
  \multicolumn{9}{|c|}{ Cartesian problem with quadratic splines} \\ \hline
    & \multicolumn{4}{c|}{ $N=128$}            
    & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9}
 $V$-cycle  & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & 
            $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline
   0 & 6.203E-02 &      & 7.164E-04 &      & 2.209E-02 &      & 7.164E-04 &      \\
   1 & 8.114E-04 & 0.01 & 6.375E-06 & 0.01 & 1.003E-04 & 0.00 & 4.509E-07 & 0.00 \\
   2 & 1.891E-05 & 0.02 & 6.079E-08 & 0.01 & 1.769E-06 & 0.02 & 8.061E-10 & 0.00 \\
   3 & 1.103E-06 & 0.06 & 5.220E-08 & 0.86 & 7.018E-08 & 0.04 & 9.970E-11 & 0.12 \\
   4 & 8.148E-08 & 0.07 & 5.220E-08 & 1.00 & 5.620E-09 & 0.08 & 9.958E-11 & 1.00 \\
   5 & 6.368E-09 & 0.08 & 5.220E-08 & 1.00 & 4.772E-10 & 0.08 & 9.958E-11 & 1.00 \\
   6 & 4.969E-10 & 0.08 & 5.220E-08 & 1.00 & 4.101E-11 & 0.09 & 9.958E-11 & 1.00 \\
   7 & 3.874E-11 & 0.08 & 5.220E-08 & 1.00 & 3.548E-12 & 0.09 & 9.958E-11 & 1.00 \\
   8 & 3.081E-12 & 0.08 & 5.220E-08 & 1.00 & 3.081E-13 & 0.09 & 9.958E-11 & 1.00 \\
   9 & 2.489E-13 & 0.08 & 5.220E-08 & 1.00 & 2.690E-14 & 0.09 & 9.958E-11 & 1.00 \\
  10 & 1.986E-14 & 0.08 & 5.220E-08 & 1.00 & 3.212E-15 & 0.12 & 9.958E-11 & 1.00 \\
 \hline \hline
 \multicolumn{9}{|c|}{ Periodic problem with quadratic splines} \\ \hline
    & \multicolumn{4}{c|}{ $N=128$}            
    & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9}
 $V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & 
            $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline
   0 & 6.203E-02 &      & 7.164E-04 &      & 2.209E-02 &      & 7.164E-04 &      \\
   1 & 1.285E-03 & 0.02 & 1.294E-05 & 0.02 & 3.116E-04 & 0.01 & 5.862E-06 & 0.01 \\
   2 & 7.878E-05 & 0.06 & 6.569E-07 & 0.05 & 2.893E-05 & 0.09 & 1.626E-07 & 0.03 \\
   3 & 6.573E-06 & 0.08 & 6.511E-08 & 0.10 & 2.691E-06 & 0.09 & 5.631E-09 & 0.03 \\
   4 & 5.681E-07 & 0.09 & 5.224E-08 & 0.80 & 2.385E-07 & 0.09 & 2.400E-10 & 0.04 \\
   5 & 4.890E-08 & 0.09 & 5.219E-08 & 1.00 & 2.097E-08 & 0.09 & 9.997E-11 & 0.42 \\
   6 & 4.198E-09 & 0.09 & 5.219E-08 & 1.00 & 1.828E-09 & 0.09 & 9.958E-11 & 1.00 \\
   7 & 3.607E-10 & 0.09 & 5.219E-08 & 1.00 & 1.584E-10 & 0.09 & 9.958E-11 & 1.00 \\
   8 & 3.103E-11 & 0.09 & 5.219E-08 & 1.00 & 1.370E-11 & 0.09 & 9.958E-11 & 1.00 \\
   9 & 2.674E-12 & 0.09 & 5.219E-08 & 1.00 & 1.184E-12 & 0.09 & 9.958E-11 & 1.00 \\
  10 & 2.307E-13 & 0.09 & 5.219E-08 & 1.00 & 1.025E-13 & 0.09 & 9.958E-11 & 1.00 \\\hline
 \end{tabular}
 \caption{The multigrid $V(1,1)$ performance with Gauss-Seidel relation
   for \emph{Cartesian} problem ($k=10$, $\sigma=0$) and
   \emph{periodic} problem ($k=10$, $\sigma=0.01$), discretized on a grid with
   $N=128$ and $1024$ intervals, using quadratic B-splines. For
   both grid sizes, a total of 6 grid levels were considered.}
 \label{tab:quad_splines}
 \end{table}
 
 \section{Conclusion}
 Using the variational principle, we have derived the expressions of
 the grid transfer matrices for Finite Elements using Splines of any
 order. It is found that:
 
 \begin{itemize}
 \item The grid transfer matrices do not depend of the geometries
   characterized by the Jacobian as defined in  $dV = x^\alpha dx$.
 \item The standard grid transfer operator used for first order finite
   difference (FD) discretization for Cartesian geometry is recovered when
   linear Spline finite elements (FE) are used.
 \item Applying these transfer matrices, we have solved Cartesian, 
   cylindrical as well as periodic one dimensional
   problems, and obtained essentially the same multigrid iterative performances
   as found for standard first order FD Cartesian problems. 
 \item No performance \emph{degradation} is observed when the order of Splines FE
   is increased from 1 to 3, or when the cylindrical geometry is considered.
 \end{itemize}
 
 For two dimensional problems, notice that for both Cartesian ($dV=dxdy$) and
 standard curvilinear geometries ($dV=r^\alpha drd\theta$), the
 Jacobian is \emph{separable}. Using this property, one can show that
 the two dimensional grid transfer consists of simply applying
 successively one dimensional grid transfer on each of the $x$ and
 $y$ (or $r$ and $\theta$) grids. With the solution
 $\mathbf{u}^h=[u^h_{ij}]$  and right hand side
 $\mathbf{b}^h=[b^h_{ij}]$ defined by
 \begin{equation}
   u(x,y) = \sum_{ij} u^h_{ij}\Lambda^h_i(x)\Lambda^h_j(y), \qquad
   b^h_{ij} = \int dx\Lambda^h_i(x)\int dy\Lambda^h_j(y)f(x,y), 
 \end{equation}
 the two dimension grid transfers can be expressed as (see Appendix \ref{sec:twod})
 \begin{equation}
   \begin{split}
   \mathbf{u}^h &= {_{x}\mathbf{P}^h_{2h}}\; \mathbf{u}^{2h}
                   \left(_{y}\mathbf{P}^h_{2h}\right)^T, \\    
   \mathbf{b}^{2h} &={_{x}\mathbf{R}^{2h}_{h}}\; \mathbf{b}^{h}
                   \left(_{y}\mathbf{R}^{2h}_{h}\right)^T. \\    
   \end{split}
 \end{equation}
 
 For more general curvilinear coordinates such as found in tokamak
 magnetic coordinates defined by $dV=J(s,\theta)dsd\theta$, we will
 assume that the grid transfer operators derived above are still
 applicable. The validity of this assumption will be the object of the
 next task.
 
 
 \appendix
 \section{Multigrid Cost Estimation}
 \label{sec:cost}
 Assuming that the \emph{coarsest} grid is fixed to $2$, the total
 number of grid levels $L$ is given by $N/2^{L-1}=2$ or $L=\log_2(N)$, where $N$
 is the number of intervals in the \emph{finest grid}. Since both
 \emph{relaxation} and intergrid transfer are proportional to the
 number of problem unknowns, the cost of the
 $V$-cycle can be estimated as:
 \begin{equation}
   \begin{split}
    \mbox{MG}(N) &= c\left[ (N+p)+(N/2+p) +\ldots + (N/2^{L-2}+p) \right] \\
        &= c \left[ 2N-4 +(L-1)p  \right] ,\\
   \end{split}
 \end{equation}
 where $p$ is the order of Splines used for the discretization. The FMG
 can then be deduced, assuming $\nu_0=1$ as
 \begin{equation}
   \begin{split}
     \mbox{FMG}(N) &= \mbox{MG}(N) +\mbox{MG}(N/2) + \ldots + \mbox{MG}(N/2^{L-2})\\
                   &= c\left[ 4N-8 +(L-1)(pL/2-4) \right] .\\
   \end{split}
 \end{equation}
 As expected a single FMG cycle (with $\nu_0=1$) costs about two  $V$-cycles.
 
 \section{Two dimensional Grid Transfer}
 \label{sec:twod}
 On the fine and the coarse grids, the problem solution $u(x,y)$ can be written
 as:
 \begin{equation*}
   u(x,y) = \sum_{i'j'} u^h_{i'j'}\Lambda^h_{i'}(x)\Lambda^h_{j'}(y) 
   = \sum_{i'j'} u^{2h}_{i'j'}\Lambda^{2h}_{i'}(x)\Lambda^{2h}_{j'}(y).
 \end{equation*}
 Projecting these two expansions on the two dimensional basis
 functions $\Lambda^h_{i}(x)\Lambda^h_{j}(y)$ yields
 
   \begin{gather*}
     \sum_{i'j'} u^h_{i'j'}
     \underbrace{\int dx\Lambda^h_{i}(x)\Lambda^h_{i'}(x)}_{M^{h,h}_{ii'}}
     \underbrace{\int dy\Lambda^h_{j}(x)\Lambda^h_{j'}(y)}_{N^{h,h}_{jj'}}
     =  \sum_{i'j'} u^{2h}_{i'j'}
     \underbrace{\int dx\Lambda^h_{i}(x)\Lambda^{2h}_{i'}(x)}_{M^{h,2h}_{ii'}}
     \underbrace{\int
       dy\Lambda^h_{j}(x)\Lambda^{2h}_{j'}(y)}_{N^{h,2h}_{jj'}} \\
      \Longrightarrow \quad
     \mathbf{M}^{h,h}\;\mathbf{u}^h\;\left(\mathbf{N}^{h,h}\right)^T =
     \mathbf{M}^{h,2h}\mathbf{u}^{2h}\left(\mathbf{N}^{h,2h}\right)^T \\
      \Longrightarrow \quad
     \mathbf{u}^h = \left(\mathbf{M}^{h,h}\right)^{-1}
     \mathbf{M}^{h,2h}\;\mathbf{u}^{2h}\;\left[\left(\mathbf{N}^{h,h}\right)^{-1}\mathbf{N}^{h,2h}\right]^T.
   \end{gather*}
 
 The right hand side can be written on the fine and coarse grids as
 \begin{equation*}
   \begin{split}
   b^{h}_{ij} = \int dx\Lambda^{h}_i(x)\int dy\Lambda^{h}_j(y)\;f(x,y)
   = \sum_{i'j'} M^{h,h}_{ii'}f^{h}_{i'j'}N^{h,h}_{jj'} \quad\Longrightarrow\quad
   \mathbf{b}^h &= \mathbf{M}^{h,h}\;\mathbf{f}^h\left(\mathbf{N}^{h,h}\right)^{T}, \\
   b^{2h}_{ij} = \int dx\Lambda^{2h}_i(x)\int dy\Lambda^{2h}_j(y)\;f(x,y)
   = \sum_{i'j'} M^{2h,h}_{ii'}f^{h}_{i'j'}N^{2h,h}_{jj'} \quad\Longrightarrow\quad
   \mathbf{b}^{2h} &= \mathbf{M}^{2h,h}\;\mathbf{f}^h\left(\mathbf{N}^{2h,h}\right)^{T}, \\
   \end{split}
 \end{equation*}
 where the expansion of $f(x,y)$ on the \emph{fine} mesh has been
 used. Elimination of $\mathbf{f}^h$ then yields
 \begin{equation*}
    \mathbf{b}^{2h} =
    \mathbf{M}^{2h,h}\left(\mathbf{M}^{h,h}\right)^{-1} \;
    \mathbf{b}^h\;
    \left[\mathbf{N}^{2h,h}\left(\mathbf{N}^{h,h}\right)^{-1} \right]^T.
 \end{equation*}
 
 \begin{thebibliography}{99}
   \bibitem{SOLVERS} {\tt The Solvers in BSPLINES}, 
     \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/solvers.pdf}
   \bibitem{BSPLINES} {\tt BSPLINES} Reference Guide, 
     \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/bsplines.pdf}
   \bibitem{Briggs} {W.L.~Briggs, V.E.~Henson and S.F.~McCormick, A
     Multigrid Tutorial, Second Edition, Siam (2000)}.
 \end{thebibliography}
 \end{document}
 
 
diff --git a/multigrid/docs/multigrid_2d.tex b/multigrid/docs/multigrid_2d.tex
index a373364..43be13c 100644
--- a/multigrid/docs/multigrid_2d.tex
+++ b/multigrid/docs/multigrid_2d.tex
@@ -1,425 +1,425 @@
 %
 % @file multigrid_2d.tex
 %
 % @brief
 %
 % @copyright
 % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 % SPC (Swiss Plasma Center)
 %
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
 % the terms of the GNU Lesser General Public License as published by the Free
 % Software Foundation, either version 3 of the License, or (at your option)
 % any later version.
 %
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 %
 % You should have received a copy of the GNU Lesser General Public License
 % along with this program. If not, see <https://www.gnu.org/licenses/>.
 %
 % @authors
 % (in alphabetical order)
 % @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 %
 \documentclass[a4paper]{article}
 \usepackage{linuxdoc-sgml}
 \usepackage{graphicx}
 \usepackage{hyperref}
 \usepackage{amsmath}
 
 \title{Multigrid for Finite Elements using Splines.}
 \author{Trach-Minh Tran, Stephan Brunner}
 \date{v0.1, January 2013}
 \abstract{A multigrid formulation for finite elements is
   derived, using variational principles. More specifically the grid
   transfer operators will be derived and tested in 2D Cartesian and
   cylindrical geometry for arbitrary order B-Splines.}
 
 \begin{document}
 \maketitle
 \tableofcontents
 
 \section{The Model Problems}
 \subsection{Cartesian Geometry}
 The following second-order boundary value problem is considered
 
 \begin{equation}
   \label{eq:cartesian_problem}
   \begin{split}
   -\left[\frac{\partial^2}{\partial x^2} 
   +\frac{\partial^2}{\partial y^2} \right] u(x,y)  &=  f(x,y) \qquad 0\le x\le 1,\quad 0\le y\le 1 \\
   u(0,y) = u(1,y) &= u(x,0) = u(x,1) = 0.
   \end{split}
 \end{equation}
 By choosing
 \begin{equation*}
   f(x,y) = \sin (\pi k_xx + \pi k_yy),
 \end{equation*}
 where $k_x$ and $k_y$ are integers, the solution of the BVP is simply
 \begin{equation*}
   u(x,y) = \frac{\sin(\pi k_xx+\pi k_yy)}{\pi^2(k_x^2+k_y^2)}.
 \end{equation*}
 Using a weak formulation on Eq.(\ref{eq:cartesian_problem}) and a grid
 of $N_x\times N_y$ intervals, one obtains the following discretized
 linear system
 \begin{equation}
   \sum_{i'=1}^{N_x+p}\sum_{j'=1}^{N_y+p}A_{iji'j'}u_{i'j'} = b_{ij},
   \qquad i=1,\ldots,N_x+p,\quad j=1,\ldots,N_y+p,
 \end{equation}
 where the unknowns $u_{ij}$ are the Spline (of order $p$) expansion
 coefficients of the solution
 \begin{equation}
   u(x,y) = \sum_{i=1}^{N_x+p}\sum_{j=1}^{N_y+p}u_{ij}\Lambda_i(x)\Lambda_j(y),
 \end{equation}
 and the matrix $A$ and right hand side $b$ are determined from
 \begin{align}
   A_{iji'j'} &= \int^1_0\int^1_0 dxdy
   \left[\Lambda'_{i'}(x)\Lambda_{j'}(y)\Lambda'_i(x)\Lambda_j(y) + 
     \Lambda_{i'}(x)\Lambda'_{j'}(y)\Lambda_i(x)\Lambda'_j(y) \right],  \\
   b_{ij} &=  \int^1_0\int^1_0 dxdy\Lambda_i(x)\Lambda_j(y)f(x,y).
 \end{align}
 Note that using a Gauss quadrature with $\lceil(2p+1)/2\rceil$ points
 per interval to calculate the matrix $A$ would yield an exact integration.
 
 \subsection{Cylindrical Geometry}
 The following second-order boundary value problem is considered:
 \begin{equation}
   \label{eq:cylindrical_problem}
   \begin{split}
   -\left[\frac{1}{r}\frac{\partial}{\partial r}r\frac{\partial}{\partial r} 
   + \frac{1}{r^2} \frac{\partial^2}{\partial\theta^2}
    \right]u(r,\theta) &= f(r,\theta) \qquad 0\le r\le 1,\quad
   0 \le \theta < 2\pi \\
     u(1,\theta) &= 0, \\
   \end{split}
 \end{equation}
 By choosing
 \begin{equation*}
   f(r,\theta) = j^2_{ms} J_{m}(j_{ms}r)\cos(m\theta),
 \end{equation*}
 where $m$ is an integer and $j_{ms}$, the $s^{th}$ zero of $J_{m}$,
 the solution of this BVP is
 \begin{equation*}
 u(r,\theta) = J_{m}(j_{ms}r)\cos(m\theta).
 \end{equation*}
 Using a weak formulation on Eq.(\ref{eq:cylindrical_problem})and a grid
 of $N_r\times N_\theta$ intervals, one obtains the following discretized
 linear system
 \begin{equation}
   \sum_{i'=1}^{N_r+p}\sum_{j'=1}^{N_\theta}A_{iji'j'}u_{i'j'} = b_{ij},
   \qquad i=1,\ldots,N_r+p,\quad j=1,\ldots,N_\theta,
 \end{equation}
 where the unknowns $u_{ij}$ are the Spline (of order $p$) expansion
 coefficients of the solution
 \begin{equation}
   u(r,\theta) = \sum_{i=1}^{N_r+p}\sum_{j=1}^{N_\theta}u_{ij}\Lambda_i(r)\Lambda_j(\theta),
 \end{equation}
 and the matrix $A$ and right hand side $b$ are determined from
 \begin{align}
   A_{iji'j'} &= \int^1_0\int^{2\pi}_0 rdrd\theta
   \left[\Lambda'_{i'}(r)\Lambda_{j'}(\theta)\Lambda'_i(r)\Lambda_j(\theta) + 
     \frac{1}{r^2}
     \Lambda_{i'}(r)\Lambda'_{j'}(\theta)\Lambda_i(r)\Lambda'_j(\theta)
     \right],  \\
   b_{ij} &=  \int^1_0\int^{2\pi}_0 rdrd\theta\Lambda_i(r)\Lambda_j(\theta)f(r,\theta).
 \end{align}
 Note that $A$ has an $1/r$ singularity in the
 integrand. For $m\neq0$, this should not be problematic since the
 converged solution behaves as $\sim r^m$ near $r=0$. The case $m=0$
 will be investigated numerically latter in this report, together withe
 the $m\neq 0$ case.
 
 \section{Restriction Operator}
 In the following, let us use the superscripts $h$ and $2h$ to denote
 quantities defined respectively on a \emph{fine} ($N_x\times N_y$
 or $N_r\times N_\theta$) and a \emph{coarser} ($N_x/2\times N_y/2$
 or $N_r/2\times N_\theta/2$) grid.
 
 The two grid transfers required in the standard \emph{multigrid}
 \cite{MG1D,Briggs} are:
 \begin{enumerate}
 \item the \emph{restriction} of the right hand side: $\mathbf{b}^{h}
   \longrightarrow \mathbf{b}^{2h}$ and 
 \item the \emph{prolongation} of the solution: $\mathbf{u}^{2h}
   \longrightarrow \mathbf{u}^{h}$.
 \end{enumerate}
 
 Noting that the basis functions $\Lambda^{2h}_i(x)$, which are \emph{piecewise}
 $C^{p-1}$ polynomials with \emph{breaks} on the \emph{coarse} grid
 points $x^{2h}_k=(2h)k$ can be also considered as \emph{piecewise}
 $C^{p-1}$ polynomials with \emph{breaks} on the \emph{fine} grid
 $x^h_k=kh$, they can be expressed \emph{uniquely} as a linear
 combination of the \emph{fine} grid basis functions:
 
 \begin{equation}
   \label{eq:basis_transf}
   \Lambda^{2h}_i(x) = \sum^{N+p}_{i'=1}c_{ii'}\Lambda^h_{i'}(x), \quad
   i=1,\ldots,N/2+p.
 \end{equation}
 The (rectangular) matrix $c_{ii'}$ can be identified as the
 one-dimensional \emph{restriction}  $\mathbf{R}$ since
 \begin{equation*}
   b^{2h}_i = \int_0^1 dx f(x)\Lambda^{2h}_i(x) =
   \sum^{N+p}_{i'=1}c_{ii'}\;b^h_{i'} =
   \sum^{N+p}_{i'=1}R_{ii'}\;b^h_{i'}.
 \end{equation*}
 
 It can be computed by simply projecting Eq.(\ref{eq:basis_transf})
 on the fine grid basis function $\Lambda^h_{j}(x)$ \cite{MG1D}:
 \begin{equation}
   \sum^{N+p}_{i'=1}R_{ii'}\underbrace{\int_0^1 dx
   \Lambda^h_{i'}(x)\Lambda^h_{j}(x)}_{M^h_{i'j}} = \underbrace{\int_0^1 dx
   \Lambda^{2h}_i(x)\Lambda^h_{j}(x)}_{M^{2h,h}_{i'j}} \Longrightarrow
   \mathbf{R}=\mathbf{M}^{2h,h}\cdot(\mathbf{M}^{h})^{-1}.
 \end{equation}
 
 It should be stressed that the representation for $\Lambda^{2h}_i(x)$
 in  Eq.(\ref{eq:basis_transf}) is \emph {unique}. This is
 checked by verifying that the same matrix $R_{ii'}$ is obtained using
 for example the \emph{collocation} methods. One such method, which is used for
 this check is detailed in Appendix \ref{sec:colloc}. The calculated
 grid transfer matrices for linear, quadratic and cubic periodic and
 non-periodic Splines are given in \cite{MG1D}. 
 
 Denoting the restriction on $x$ and $y$ respectively
 by $\mathbf{R}^x$ and $\mathbf{R}^y$, the two-dimensional restriction of $b^h_{ij}$ is
 defined as
 \begin{equation*}
    b^{2h}_{ij} = \int_0^1\int_0^1 dxdy f(x,y)\Lambda^{2h}_i(x)\Lambda^{2h}_j(y) =
   \sum^{N+p}_{i'=1}\sum^{N+p}_{j'=1}R^x_{ii'}R^y_{jj'}b^h_{i'j'},
 \end{equation*}
 and thus
 \begin{equation}
   \label{eq:restriction}
   \boxed{\mathbf{b}^{2h} = \mathbf{R}^x \cdot \mathbf{b}^{h}
   \cdot (\mathbf{R}^y)^T.}
 \end{equation}
 
 \section{Prolongation Operator}
 Using Eq.(\ref{eq:basis_transf}) (with $c_{ii'}=R_{ii'}$), the
 solution at the coarse grid can be expressed as
 \begin{equation*}
   u^{2h}(x) = \sum_{i=1}^{N/2+p}u^{2h}_{i}\Lambda^{2h}_{i}(x) = 
   \sum_{i'=1}^{N+p}\left[\sum_{i=1}^{N/2+p} R_{ii'}u^{2h}_{i}\right]
   \Lambda^h_{i'}(x) =
   \sum_{i'=1}^{N+p}\underbrace{\left[\sum_{i=1}^{N/2+p} (R)^T_{i'i}u^{2h}_{i}\right]}_{\tilde{u}^h_{i'}}
   \Lambda^h_{i'}(x),
 \end{equation*}
 from which one obvious choice for the \emph{prolongation} operator would be
 \begin{equation}
   \mathbf{P} = \mathbf{R}^T = (\mathbf{M}^{h})^{-1}\cdot\mathbf{M}^{h,2h}.
 \end{equation}
 
 Generalization to a two-dimensional prolongation is obtained as
 follows, where summation over repeated indices is assumed:
 \begin{equation*}
   u^{2h}(x,y) = u^{2h}_{ij}\Lambda^{2h}_{i}(x)\Lambda^{2h}_{j}(y) =
   \left[R^x_{ii'}u^{2h}_{ij}R^y_{jj'}\right]\Lambda^{h}_{i'}(x)\Lambda^{h}_{j'}(y)
 \end{equation*}
 which leads to the prolonged solution $\tilde{\mathbf{u}}^{h}$ given by
 \begin{equation}
   \label{eq:prolongation}
   \boxed{\tilde{\mathbf{u}}^{h} = \mathbf{P}^x \cdot \mathbf{u}^{2h} \cdot
   (\mathbf{P}^y)^T .}
 \end{equation}
 
 It should be noted here that, while the restricted right hand side
 $\mathbf{b}^{2h}$ as defined in Eq.(\ref{eq:restriction}) is
 \emph{exactly identical} to the assembled right hand side, the
 prolonged solution $\tilde{\mathbf{u}}^{h}$ defined in
 Eq.(\ref{eq:prolongation}) is just a
 representation of $u^{2h}(x,y)$ on the fine mesh and \emph{not} the
 solution $u^h(x,y)$ which can only be obtained by solving the problem
 on the fine mesh!
 
 \section{Numerical Experiments}
 The multigrid performance can be characterized by looking at the
 convergence of the residual Euclidean norm for the linear system 
 $\mathbf{A}\mathbf{u}=\mathbf{b}$:
 
 \begin{equation}
 \label{eq:resid}
 \|\mathbf{r}\|_2 = \|\mathbf{b}-\mathbf{A}\mathbf{u}\|_2.
 \end{equation}
 When the \emph{exact} solution $u(x,y)$ is known, the \emph{discretization error}
 can defined as
 \begin{equation}
 \label{eq:discerr}
   \|e\|_2 = \sqrt{\int dV\left[\sum_{ij}u_{ij}\Lambda_{ij}(x,y)-u(x,y)\right]^2}
 \end{equation}
 and computed using a Gauss quadrature. Note that for Splines of order
 $p$, $\|e(x,y)\|_2(h)$ converges to zero as $O(h^{p+1})$. 
 
 \subsection{Cartesian Geometry}
 The multigrid performances for varying problem sizes are displayed in
 Fig.(\ref{fig:linear_mg2d}) for linear Splines and
 Fig.(\ref{fig:cubic_mg2d}) for cubic Splines. They show that the number
 of iterations required for convergence (abount 3 for both linear and cubic
 Splines) is insensitive to the problem sizes. Compared to direct
 methods, the multigrid should scale much better for large problem
 sizes, as indicated in Table~\ref{tab:comparison1}. For this model
 problem, using cubic Splines seems to converge 
 slightly faster than linear Splines!
 
 \begin{figure}
   \centering
   \includegraphics[angle=0,width=\hsize]{linear_mg2d}
   \caption{Performance of the multigrid $V(2,1)$ scheme using a
     Gauss-Seidel relaxation and \emph{linear Splines} for different
     problem sizes. The size of the \emph{coarsest} grid  is $2\times 2$.}
   \label{fig:linear_mg2d}
 \end{figure}
 
 \begin{figure}
   \centering
   \includegraphics[angle=0,width=\hsize]{cubic_mg2d}
   \caption{Performance of the multigrid $V(2,1)$ scheme using a
     Gauss-Seidel relaxation and \emph{cubic Splines} for different
     problem sizes. The size of the \emph{coarsest} grid  is $2\times 2$.}
   \label{fig:cubic_mg2d}
 \end{figure}
 
 \begin{table}[htb]
   \centering
   \begin{tabular}{|c|c|c|c|c|c|}\hline
     & \multicolumn{2}{c|}{Linear Splines}
     & \multicolumn{2}{c|}{Cubic Splines} \\ \hline
 $N$ & $V(2,1)$ & Direct & $V(2,1)$ & Direct \\ \hline
  16 & 8.844E-04 & 2.051E-03 & 2.653E-03 & 3.970E-03 \\
  32 & 1.661E-03 & 5.345E-03 & 4.983E-03 & 1.540E-02 \\
  64 & 5.766E-03 & 2.054E-02 & 1.730E-02 & 7.492E-02 \\
 128 & 2.347E-02 & 3.288E-01 & 7.042E-02 & 1.060E+00 \\
 \hline
   \end{tabular}
   \caption{Times (in seconds) used by a the \emph{direct sparse} solver MUMPS-4.10.0
     for different problem sizes versus the times used by \emph{three}
     multigrid $V(2,1)$ cycles. The Intel Fortran-13.0 compiler is used on an Intel
     i7 platform.}
 \label{tab:comparison1}
 \end{table}
 
 The effects of the relaxation parameters $\nu_1,\nu_2$ on the
 multigrid performnace (Fig.(\ref{fig:cubic_mg2d_relax})) indicates
 that only a few relaxations are sufficient to achieve a good multigrid
 performance. Further analysis of the computational cost is required
 however to determine the \emph{optimal} $\nu_1,\nu_2$.
 
 Finally, the effects of the number of grid levels are analyzed in
 Fig.(\ref{fig:cubic_mg2d_levels}). In addition to the computational
 cost (see Table~\ref{tab:comparison2}), the memory required for the
 \emph{direct  solver} at the coarsest grid level should be taken into
 account for  the choice of the optimal number of grid levels,
 especially for very large problems.
 
 \begin{figure}
   \centering
   \includegraphics[angle=0,width=\hsize]{cubic_mg2d_relax}
   \caption{Effect of the number of the relaxation sweeps $\nu_1,\nu_2$
     on the performance of the multigrid $V(\nu_1,\nu_2)$-cycle for
     \emph{Cubic Splines}. The finest grid has $128\times 128$
     intervals.}
   \label{fig:cubic_mg2d_relax}
 \end{figure}
 
 \begin{figure}
   \centering
   \includegraphics[angle=0,width=\hsize]{cubic_mg2d_levels}
   \caption{Effect of the number grid levels
     on the performance of the multigrid $V(2,1)$-cycle for \emph{Cubic
       Splines}. The finest grid has $128\times 128$ intervals.}
   \label{fig:cubic_mg2d_levels}
 \end{figure}
 
 \begin{table}[htb]
   \centering
   \begin{tabular}{|c|c|c|c|}\hline
 Number of levels & $V(1,0)$ &  $V(1,1)$ & $V(2,1)$ \\ \hline
   2 & 3.386E-02 & 3.881E-02 & 4.031E-02 \\
   3 & 2.923E-02 & 3.398E-02 & 3.605E-02 \\
   4 & 2.880E-02 & 3.275E-02 & 3.595E-02 \\
   7 & 2.912E-02 & 3.236E-02 & 3.566E-02 \\
 \hline
   \end{tabular}
   \caption{Effects of the times in seconds used per $V$-cyclefor
     different number of grid levels and relaxation paramters for a
     $128\times 128$ problem. The Intel Fortran-13.0 compiler is used on an Intel
     i7 platform.}
 \label{tab:comparison2}
 \end{table}
 
 
 
 \subsection{Cylindrical Geometry}
 
 \newpage
 \appendix
 
 \section{Grid transfer matrix by collocation}
 \label{sec:colloc}
 Let first consider the \emph{periodic case}. Denoting $N$ as the
 number of intervals of the fine grid, the \emph{periodic} Spline basis
 functions on the \emph{coarse} grid $\Lambda^{2h}_i$ can be expressed
 as linear combinations of the \emph{fine} grid Spline basis functions
 as:
 \begin{equation}
   \Lambda^{2h}_i(x) = \sum^{N}_{i'=1}R_{ii'}\Lambda^h_{i'}(x), \quad
   i=1,\ldots,N/2.
 \end{equation}
 For any given $i$, the coefficients $R_{ii'}$ can be calculated by
 expressing the relation above on exactly $N$ points on the $x$-grid. For
 \emph{odd} Spline order $p$, these \emph{collocation} (or
 interpolating) points can be chosen as the \emph{break} points of the fine
 grid $x^h_k,\quad k=0,\ldots,N-1$. For \emph{even} values of $p$, the
 collocation points should be $x^h_{k+1/2}=(x^h_{k}+x^h_{k+1})/2$ in
 order to obtain a non-singular linear system of equations 
 \cite{BSPLINES}. The resulting system of equations to solve for
 $R_{ii'}$ are given below: 
 \begin{equation}
   \begin{split}
     p\mbox{ odd}: \qquad &\sum^{N}_{i'=1}\Lambda^h_{i'}(x^h_k)\,R_{ii'} =
     \Lambda^{2h}_i(x^h_k), \qquad k=0,\ldots,N-1,\quad i=1,\ldots,N/2, \\
     p\mbox{ even}: \qquad &\sum^{N}_{i'=1}\Lambda^h_{i'}(x^h_{k+1/2})\,R_{ii'} =
     \Lambda^{2h}_i(x^h_{k+1/2}), \qquad k=0,\ldots,N-1,\quad i=1,\ldots,N/2.\\
   \end{split}
 \end{equation}
 
 For \emph{non-periodic} Splines, there are $N+p$ and
 $N/2+p$ basis functions respectively on the fine and coarse grid:
 \begin{equation}
   \Lambda^{2h}_i(x) = \sum^{N+p}_{i'=1}R_{ii'}\Lambda^h_{i'}(x), \quad
   i=1,\ldots,N/2+p.
 \end{equation}
 This implies that for any given $\Lambda^{2h}_i$, $N+p$ conditions
 are required to determined the $N+p$ terms of row $i$ of the matrix $R_{ii'}$. For odd $p$,
 $N+1$ collocation points $x_k,\quad k=0,\ldots,N$ can be used with the
 missing $p-1$ equations obtained by expressing all the $(p-1)/2$ derivatives
 of $\Lambda^{2h}_i(x)$ at the end points $x_0$ and $x_N$:
 \begin{equation}
  \frac{d^\alpha}{dx^\alpha}\Lambda^{2h}_i(x) =
  \sum^{N+p}_{i'=1}R_{ii'}\frac{d^\alpha}{dx^\alpha} \Lambda^h_{i'}(x)
  , \quad \alpha=1,\dots,\frac{p-1}{2} \quad (\mbox{$p$ odd}).
 \end{equation}
 For \emph{even} $p$, in addition to the $N$ relations obtained with
 the collocation points $x_{k+1/2}$ (as in the \emph{periodic} case),
 the missing $p$ conditions can be obtained by expressing $\Lambda^{2h}_i$
 and its derivatives up to $p/2-1$ at the end points $x_0$ and $x_N$: 
 \begin{equation}
  \frac{d^\alpha}{dx^\alpha}\Lambda^{2h}_i(x) =
  \sum^{N+p}_{i'=1}R_{ii'}\frac{d^\alpha}{dx^\alpha} \Lambda^h_{i'}(x)
  , \quad \alpha=0,\dots,\frac{p}{2}-1 \quad (\mbox{$p$ even}).
 \end{equation}
 
 \begin{thebibliography}{99}
   \bibitem{MG1D} {\tt Multigrid Formulation for Finite Elements},\\ 
     \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/multigrid/docs/multigrid.pdf}
   \bibitem{Briggs} {W.L.~Briggs, V.E.~Henson and S.F.~McCormick, A
     Multigrid Tutorial, Second Edition, Siam (2000)}.
   \bibitem{BSPLINES} {\tt BSPLINES} Reference Guide, 
     \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/bsplines.pdf}
   \bibitem{SOLVERS} {\tt The Solvers in BSPLINES}, 
     \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/solvers.pdf}
 \end{thebibliography}
 
 \end{document}
diff --git a/multigrid/src/CMakeLists.txt b/multigrid/src/CMakeLists.txt
index 888ffc3..7530640 100644
--- a/multigrid/src/CMakeLists.txt
+++ b/multigrid/src/CMakeLists.txt
@@ -1,92 +1,92 @@
 #
 # @file CMakeLists.txt
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Nicolas Richart <nicolas.richart@epfl.ch>
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 project(multigrid_tests)
 
 if(${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel")
 #  set(CMAKE_Fortran_FLAGS_RELEASE
 #    "${CMAKE_Fortran_FLAGS_RELEASE} -profile-functions -profile-loops=outer"
 #    )
   set(CMAKE_Fortran_FLAGS_DEBUG
     "${CMAKE_Fortran_FLAGS_DEBUG} -fpe0"
     )
 endif()
 
 set(MG_TESTS
   transfer1d
   test_relax
   test_mg
   test_mgp
   test_csr
   two_grid
   test_mg2d
   test_relax2d
   test_transf2d
   transfer1d_col
   test_relax2d_cyl
   test_transf2d_cyl
   test_mg2d_cyl
 )
 
 foreach(test ${MG_TESTS})
   add_executable(${test} ${test}.f90)
   target_link_libraries(${test} bsplines)
 endforeach()
 
 add_executable(poisson_fd poisson_fd.f90 fdmat_mod.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90)
 target_link_libraries(poisson_fd bsplines)
 set(TESTS ${TESTS} poisson_mg)
 
 add_executable(partition partition.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90)
 target_link_libraries(partition bsplines)
 
 # Fail to compile with crayftn 4.0.46 on ROSA/DAINT
 add_executable(test_stencil test_stencil.f90 stencil_mod.f90 gvector_mod.f90)
 target_link_libraries(test_stencil bsplines)
 
 add_executable(test_stencilg test_stencilg.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90)
 target_link_libraries(test_stencilg bsplines)
 
 # Fail to compile with crayftn 4.0.46 on ROSA/DAINT
 add_executable(test_jacobi test_jacobi.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90)
 target_link_libraries(test_jacobi bsplines)
 
 add_executable(test_jacobig test_jacobig.f90  fdmat_mod.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90)
 target_link_libraries(test_jacobig bsplines)
 
 add_executable(ppoisson_fd ppoisson_fd.f90  fdmat_mod.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90)
 target_link_libraries(ppoisson_fd bsplines)
 
 add_executable(test_gvec1d test_gvec1d.f90)
 target_link_libraries(test_gvec1d bsplines)
 
 add_executable(test_intergrid0 test_intergrid0.f90)
 target_link_libraries(test_intergrid0 bsplines)
 
 add_executable(test_intergrid1 test_intergrid1.f90 parmg_mod.f90 gvector_mod.f90 stencil_mod.f90)
 target_link_libraries(test_intergrid1 bsplines)
 
 include_directories(${CMAKE_CURRENT_BINARY_DIR})
diff --git a/multigrid/src/Makefile b/multigrid/src/Makefile
index 5748f81..235bb03 100644
--- a/multigrid/src/Makefile
+++ b/multigrid/src/Makefile
@@ -1,136 +1,136 @@
 #
 # @file Makefile
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 MPIF90 = mpif90
 LD = $(MPIF90)
 
 # F90FLAGS = -I$(HOME)/include/O -I$(PPUTILS2)
 # LDFLAGS = -L$(HOME)/lib/O -L${HDF5}/lib
 F90FLAGS = -I$(FUTILS)/include -I$(BSPLINES)/include
 LDFLAGS = -mkl=cluster -L$(FUTILS)/lib -L$(BSPLINES)/lib -L${HDF5}/lib
 
 MODS = gvector_mod.o stencil_mod.o
 LIBS = $(MODS) -lbsplines -lpppack -lpputils2 -lfutils \
        -lhdf5_fortran -lhdf5 -lz
 
 ifdef MKL
 SPBLAS = -DMKL
 endif
 
 ifdef MUMPS
 F90FLAGS += -I$(MUMPS)/include
 LDFLAGS += -L$(MUMPS)/lib
 LIBS += $(MUMPSLIBS)
 endif
 
 all:	 transfer1d test_relax test_mg test_mgp test_csr two_grid \
          test_mg2d test_relax2d test_transf2d transfer1d_col \
          test_relax2d_cyl test_transf2d_cyl test_mg2d_cyl poisson_fd
 
 .SUFFIXES:
 .SUFFIXES: .o .f90
 
 .f90.o:
 	$(MPIF90) $(F90FLAGS) -c $<
 
 partition: partition.o
 	$(LD) $(LDFLAGS) -o $@ $< parmg_mod.o $(MODS) -lpputils2 -lfutils \
        -lhdf5_fortran -lhdf5 -lz
 
 transfer1d:	transfer1d.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 
 test_relax:	test_relax.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 
 test_mg:	test_mg.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 
 test_mgp:	test_mgp.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 
 test_csr:	test_csr.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 
 two_grid:	two_grid.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 
 test_mg2d:	test_mg2d.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 
 test_relax2d:	test_relax2d.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 
 test_transf2d:	test_transf2d.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 
 transfer1d_col:	transfer1d_col.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 
 test_relax2d_cyl:	test_relax2d_cyl.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 
 test_transf2d_cyl:	test_transf2d_cyl.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 
 test_mg2d_cyl:	test_mg2d_cyl.o
 	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
 
 poisson_fd:	poisson_fd.o
 	$(LD) $(LDFLAGS) -o $@ $< fdmat_mod.o $(LIBS)
 
 csr_mod.o: csr_mod.f90
 	    $(MPIF90) $(FPP) $(SPBLAS) $(F90FLAGS) -c csr_mod.f90
 
 parmg_mod.o: gvector_mod.o stencil_mod.o
 partition.o: parmg_mod.o
 transfer1d.o: $(MODS)
 test_relax.o: $(MODS)
 test_mg.o: $(MODS)
 test_mgp.o: $(MODS)
 test_csr.o: $(MODS)
 two_grid.o: $(MODS)
 test_mg2d.o: $(MODS)
 test_relax2d.o: $(MODS)
 test_transf2d.o: $(MODS)
 transfer1d_col.o: $(MODS)
 test_relax2d_cyl.o: $(MODS)
 test_transf2d_cyl.o: $(MODS)
 test_mg2d_cyl.o: $(MODS)
 poisson_fd.o: fdmat_mod.o
 fdmat_mod.o: stencil_mod.o parmg_mod.o
 stencil_mod.o: gvector_mod.o
 parmg_mod.o: gvector_mod.o stencil_mod.o
 
 clean:
 	rm -f *.o *.mod *~ ../wk/*~ a.out lib
 
 distclean: clean
 	rm -f ../wk/*.h5 ../wk/fort.* *.eps \
 	   transfer1d test_relax test_mg test_mgp test_csr two_grid \
            test_mg2d test_relax2d test_transf2d transfer1d_col \
            test_relax2d_cyl test_transf2d_cyl test_mg2d_cyl
 
 #include $(HOST).mk
diff --git a/multigrid/src/README_mod.txt b/multigrid/src/README_mod.txt
index 224e499..22b365f 100644
--- a/multigrid/src/README_mod.txt
+++ b/multigrid/src/README_mod.txt
@@ -1,91 +1,91 @@
 /**
  * @file README_mod.txt
  *
  * @brief
  *
  * @copyright
  * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
  * SPC (Swiss Plasma Center)
  *
- * spclibs is free software: you can redistribute it and/or modify it under
+ * SPClibs is free software: you can redistribute it and/or modify it under
  * the terms of the GNU Lesser General Public License as published by the Free
  * Software Foundation, either version 3 of the License, or (at your option)
  * any later version.
  *
- * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program. If not, see <https://www.gnu.org/licenses/>.
  *
  * @authors
  * (in alphabetical order)
  * @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
  */
 1) Module parmg
    ============
   - types:
       grid2_type
        INTEGER, DIMENSION(2)    :: s, e, s0, e0, npt_loc, npt
        REAL(rkind), ALLOCATABLE :: x(:), y(:)
        TYPE(gvector_2d)         :: f, v
        TYPE(stencil_2d)         :: fdmat, restrict_mat
 
    - module procedures:
        creat_grid
        coarse (1d, 2d)
        exchange (gvector)
        prolong  (gvector)
        restrict (gvector)
        jacobi
        get_resids
        init_restrict (gvector)
        disp (0d, 1d array of int)
        get_lmax
 
    - Uses
        gvector
        stencil
 
 2) Module stencil
    ==============
   - types:
         LOGICAL                  :: nluni
         INTEGER, DIMENSION(2)    :: ldim, gdim, s0, e0, s, e
         INTEGER                  :: npoints
         INTEGER, ALLOCATABLE     :: id(:,:)
         REAL(rkind), ALLOCATABLE :: val(:,:,:)
 
   - module procedures:
       init
       vmx
       laplacian
       putmat
 
   - operators:
       *: vmx
 
    - Uses
        gvector
 
 3) Module gvector
    ==============
   - types:
       gvector_2d
         INTEGER, DIMENSION(2)    :: s, e ! vector internal bounds
         INTEGER, DIMENSION(2)    :: g    ! ghost cell widths
         REAL(rkind), ALLOCATABLE :: val(:,:)
 
   - module procedures:
       constructor (gvector_2d)
       disp
       norm2 (serial, mpi)
       
   - operators:
       + : add_scal, add_vec
       - : minus_vec, substract_vec
       * : scale_left, scale_right
 
   - assignment:
       = : from_scal, from_vec 
diff --git a/multigrid/src/fdmat_mod.f90 b/multigrid/src/fdmat_mod.f90
index c925169..2df2be5 100644
--- a/multigrid/src/fdmat_mod.f90
+++ b/multigrid/src/fdmat_mod.f90
@@ -1,600 +1,600 @@
 !>
 !> @file fdmat_mod.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE fdmat_mod
 !
   USE multigrid
   IMPLICIT NONE
 !
   INTERFACE fdmat
      MODULE PROCEDURE fdmat_stencil
      MODULE PROCEDURE fdmat_gen, fdmat_csr, fdmat_cds
   END INTERFACE fdmat
   INTERFACE ibc_fdmat
      MODULE PROCEDURE ibc_fdmat_stencil
      MODULE PROCEDURE ibc_fdmat_gen, ibc_fdmat_csr, ibc_fdmat_cds
   END INTERFACE ibc_fdmat
   INTERFACE ibc_rhs
      MODULE PROCEDURE ibc_rhs_g
   END INTERFACE ibc_rhs
 !
 CONTAINS
 !--------------------------------------------------------------------------------
   SUBROUTINE fdmat_stencil(grid, fdense, icrosst, mat)
 !
 !   Construct model GBS FD partitioned matrix
 !
     USE iso_fortran_env, ONLY : rkind => real64
     USE stencil, ONLY : stencil_2d
     USE parmg, ONLY   : grid2_type
 !
     TYPE(grid2_type)        :: grid
     REAL(rkind), INTENT(in) :: icrosst
     TYPE(stencil_2d)        :: mat
     INTERFACE
        FUNCTION fdense(x)
          USE iso_fortran_env, ONLY : rkind => real64
          REAL(rkind), INTENT(in) :: x(:)
          REAL(rkind)             :: fdense(SIZE(x))
        END FUNCTION fdense
     END INTERFACE
 !
     REAL(rkind) :: dx, dy
     REAL(rkind),ALLOCATABLE :: dense(:)
     REAL(rkind) :: stencil_arr(-1:1,-1:1), zdiag(-1:1,-1:1), corr
     INTEGER     :: nx, ny, i, j, k, d(2)
 !
 !   Grid properties
 !
     nx = grid%npt(1)-1
     ny = grid%npt(2)-1
     dx = grid%x(1)-grid%x(0)  ! Assume equidistant grid
     dy = grid%y(1)-grid%y(0)
     ALLOCATE(dense(0:nx))     ! electron densoty vary only along x
 !
 !   Stencil array
 !
     stencil_arr = 0.0d0
     zdiag   = 0.0d0
     corr    = 1.d0+icrosst**2/4.0d0
     stencil_arr(0,0)   = -2.0d0/dx/dx-2.0d0/dy/dy*corr
     stencil_arr(-1,0)  =  1.0/dx/dx
     stencil_arr(1,0)   =  1.0/dx/dx
     stencil_arr(0,-1)  =  1.0/dy/dy*corr
     stencil_arr(0,1)   =  1.0/dy/dy*corr
     stencil_arr(-1,-1) =  icrosst*1.0/4.0/dx/dy
     stencil_arr(1,-1)  =  icrosst*(-1.0/4.0/dx/dy)
     stencil_arr(-1,1)  =  icrosst*(-1.0/4.0/dx/dy)
     stencil_arr(1,1)   =  icrosst*(1.0/4.0/dx/dy)
     zdiag(0,0)     =  1.0d0
     dense(:) = fdense(grid%x(:))
 !
 !  Assemble the stencil by scanning local grid points
 !
     DO k=0,mat%npoints-1
        d(:) = mat%id(k,:)
        DO j=mat%s(2),mat%e(2)
           DO i=mat%s(1),mat%e(1)
              mat%val(i,j,k) = stencil_arr(d(1),d(2)) + &
                   &           zdiag(d(1),d(2))*dense(i)
           END DO
        END DO
     END DO
 !
     DEALLOCATE(dense)
   END SUBROUTINE fdmat_stencil
 !--------------------------------------------------------------------------------
   SUBROUTINE ibc_rhs_g(f, s0, e0, prb)
 !
 !   Impose BC on rhs
 !
     USE iso_fortran_env, ONLY : rkind => real64
     USE gvector, ONLY : gvector_2d
 !
     TYPE(gvector_2d), INTENT(inout) :: f
     CHARACTER(len=*), INTENT(in)    :: prb
     INTEGER, INTENT(in)             :: s0(2), e0(2)
     INTEGER :: s(2), e(2)
 !
     s = f%s
     e = f%e
 !
     IF(s(1).EQ.s0(1)) THEN
        IF(prb(1:1).EQ.'d') THEN    ! West face
           f%val(s(1),s(2):e(2)) = 0.0_rkind
        ELSE
           f%val(s(1),s(2):e(2)) = 0.5_rkind*f%val(s(1),s(2):e(2))
        END IF
     END IF
     IF(e(1).EQ.e0(1)) THEN 
        IF(prb(2:2).EQ.'d') THEN    ! East face
           f%val(e(1),s(2):e(2)) = 0.0_rkind
        ELSE
           f%val(e(1),s(2):e(2)) = 0.5_rkind*f%val(e(1),s(2):e(2))
        END IF
     END IF
     IF(s(2).EQ.s0(2)) THEN
        IF(prb(3:3).EQ.'d') THEN    ! South face
           f%val(s(1):e(1),s(2)) = 0.0_rkind
        ELSE
           f%val(s(1):e(1),s(2)) = 0.5_rkind*f%val(s(1):e(1),s(2))
        END IF
     END IF
     IF(e(2).EQ.e0(2)) THEN
        IF(prb(4:4).EQ.'d') THEN    ! North face
           f%val(s(1):e(1),e(2)) = 0.0_rkind
        ELSE
           f%val(s(1):e(1),e(2)) = 0.5_rkind*f%val(s(1):e(1),e(2))
        END IF
     END IF
   END SUBROUTINE ibc_rhs_g
 !--------------------------------------------------------------------------------
   SUBROUTINE ibc_fdmat_stencil(mat, prb)
 !
 !   Impose BC on matrix
 !
     USE iso_fortran_env, ONLY : rkind => real64
     USE stencil, ONLY : stencil_2d
 !
     TYPE(stencil_2d), INTENT(inout) :: mat
     CHARACTER(len=*), INTENT(in)    :: prb
 !
     INTEGER :: s0(2), e0(2), s(2), e(2)
 !
     s0 = mat%s0
     e0 = mat%e0
     s = mat%s
     e = mat%e
 !
 !   Neumann BC
 !   WARNING: Divide the stencil by 2 => should do the same for RHS!
 !
 !                       N
 !                   6---7---8
 !                   |   |   |
 !                W  4---0---5   E   Numbering of stencil
 !                   |   |   |
 !                   1---2---3
 !                       S
 !
     IF(s(1).EQ.s0(1) .AND. prb(1:1).EQ.'n') THEN    ! West face
        mat%val(s(1),s(2):e(2),1) = 0.0_rkind
        mat%val(s(1),s(2):e(2),3) = 0.0_rkind
        mat%val(s(1),s(2):e(2),4) = 0.0_rkind
        mat%val(s(1),s(2):e(2),5) = 2.0d0*mat%val(s(1),s(2):e(2),5)
        mat%val(s(1),s(2):e(2),6) = 0.0_rkind
        mat%val(s(1),s(2):e(2),8) = 0.0_rkind
        mat%val(s(1),s(2):e(2),:) = 0.5_rkind*mat%val(s(1),s(2):e(2),:)
     END IF
     IF(e(1).EQ.e0(1) .AND. prb(2:2).EQ.'n') THEN    ! East face
        mat%val(e(1),s(2):e(2),1) = 0.0_rkind
        mat%val(e(1),s(2):e(2),3) = 0.0_rkind
        mat%val(e(1),s(2):e(2),4) = 2.0d0*mat%val(e(1),s(2):e(2),4)
        mat%val(e(1),s(2):e(2),5) = 0.0_rkind
        mat%val(e(1),s(2):e(2),6) = 0.0_rkind
        mat%val(e(1),s(2):e(2),8) = 0.0_rkind
        mat%val(e(1),s(2):e(2),:) = 0.5_rkind*mat%val(e(1),s(2):e(2),:)
     END IF
     IF(s(2).EQ.s0(2) .AND. prb(3:3).EQ.'n') THEN    ! South face
        mat%val(s(1):e(1),s(2),1) = 0.0_rkind
        mat%val(s(1):e(1),s(2),2) = 0.0_rkind
        mat%val(s(1):e(1),s(2),3) = 0.0_rkind
        mat%val(s(1):e(1),s(2),6) = 0.0_rkind
        mat%val(s(1):e(1),s(2),7) = 2.0d0*mat%val(s(1):e(1),s(2),7)
        mat%val(s(1):e(1),s(2),8) = 0.0_rkind
        mat%val(s(1):e(1),s(2),:) = 0.5_rkind*mat%val(s(1):e(1),s(2),:)
     END IF
     IF(e(2).EQ.e0(2) .AND. prb(4:4).EQ.'n') THEN    ! North face
        mat%val(s(1):e(1),e(2),1) = 0.0_rkind
        mat%val(s(1):e(1),e(2),2) = 2.0d0*mat%val(s(1):e(1),e(2),2)
        mat%val(s(1):e(1),e(2),3) = 0.0_rkind
        mat%val(s(1):e(1),e(2),6) = 0.0_rkind
        mat%val(s(1):e(1),e(2),7) = 0.0_rkind
        mat%val(s(1):e(1),e(2),8) = 0.0_rkind
        mat%val(s(1):e(1),e(2),:) = 0.5_rkind*mat%val(s(1):e(1),e(2),:)
     END IF
 !
 !   Dirichlet BC
 !
     IF(s(1).EQ.s0(1) .AND. prb(1:1).EQ.'d') THEN    ! West face
        mat%val(s(1),s(2):e(2),:) = 0.0_rkind
        mat%val(s(1),s(2):e(2),0) = 1.0_rkind
     END IF
     IF(e(1).EQ.e0(1) .AND. prb(2:2).EQ.'d') THEN    ! East face
        mat%val(e(1),s(2):e(2),:) = 0.0_rkind
        mat%val(e(1),s(2):e(2),0) = 1.0_rkind
     END IF
     IF(s(2).EQ.s0(2) .AND. prb(3:3).EQ.'d') THEN    ! South face
        mat%val(s(1):e(1),s(2),:) = 0.0_rkind
        mat%val(s(1):e(1),s(2),0) = 1.0_rkind
     END IF
     IF(e(2).EQ.e0(2) .AND. prb(4:4).EQ.'d') THEN    ! North face
        mat%val(s(1):e(1),e(2),:) = 0.0_rkind
        mat%val(s(1):e(1),e(2),0) = 1.0_rkind
     END IF
   END SUBROUTINE ibc_fdmat_stencil
 !--------------------------------------------------------------------------------
   SUBROUTINE fdmat_gen(grid, fdense, icrosst, noinit)
 !
 !   Generic version
 !
     TYPE(grid2d), INTENT(inout)   :: grid
     DOUBLE PRECISION, INTENT(in)  :: icrosst
     LOGICAL, INTENT(in), OPTIONAL :: noinit
     INTERFACE
        FUNCTION fdense(x)
          DOUBLE PRECISION, INTENT(in) :: x(:)
          DOUBLE PRECISION :: fdense(SIZE(x))
        END FUNCTION fdense
     END INTERFACE
 !
     IF(ALLOCATED(grid%mata)) THEN
        CALL fdmat_csr(grid, fdense, icrosst, grid%mata, noinit)
     ELSE
        CALL fdmat_cds(grid, fdense, icrosst, grid%mata_cds, noinit)
     END IF
   END SUBROUTINE fdmat_gen
 !--------------------------------------------------------------------------------
   SUBROUTINE fdmat_cds(grid, fdense, icrosst, mat, noinit)
 !
 !  Construct FD matrix
 !
     TYPE(grid2d), INTENT(in)      :: grid
     DOUBLE PRECISION, INTENT(in)  :: icrosst
     TYPE(cds_mat), INTENT(inout)  :: mat
     LOGICAL, INTENT(in), OPTIONAL :: noinit
     INTERFACE
        FUNCTION fdense(x)
          DOUBLE PRECISION, INTENT(in) :: x(:)
          DOUBLE PRECISION :: fdense(SIZE(x))
        END FUNCTION fdense
     END INTERFACE
 !
     INTEGER :: n, nx, ny
     INTEGER :: kl, ku, k
     INTEGER :: ix, ix2, jx, iy, jy, iy2,  irow
     DOUBLE PRECISION :: lx, ly, dx, dy, mele
     DOUBLE PRECISION :: dense(0:grid%n(1))
     DOUBLE PRECISION :: stencil(-1:1,-1:1), zdiag(-1:1,-1:1)
     DOUBLE PRECISION :: corr
     LOGICAL :: run_init
     INTEGER, ALLOCATABLE :: dists(:)
 !--------------------------------------------------------------------------------
     run_init = .TRUE.
     IF(PRESENT(noinit)) run_init = .NOT.noinit
 !
 !   Grid properties
 !
     nx = grid%n(1)
     ny = grid%n(2)
     dx = grid%x(1) - grid%x(0)
     dy = grid%y(1) - grid%y(0)
     lx = grid%x(nx)
     ly = grid%y(ny)
     n = PRODUCT(grid%rank)  ! Rank of matrix
 !
 !   Stencil
 !
     stencil = 0.0d0
     zdiag = 0.0d0
 !
     corr = 1.d0+icrosst**2/4.0d0
     stencil(0,0)   = -2.0d0/dx/dx-2.0d0/dy/dy*corr
     stencil(-1,0)  =  1.0/dx/dx
     stencil(1,0)   =  1.0/dx/dx
     stencil(0,-1)  =  1.0/dy/dy*corr
     stencil(0,1)   =  1.0/dy/dy*corr
     stencil(-1,-1) =  icrosst*1.0/4.0/dx/dy
     stencil(1,-1)  =  icrosst*(-1.0/4.0/dx/dy)
     stencil(-1,1)  =  icrosst*(-1.0/4.0/dx/dy)
     stencil(1,1)   =  icrosst*(1.0/4.0/dx/dy)
     zdiag(0,0)     = 1.0d0
 !
 !   9-point stencil "diagonal storage"
 !
     kl=4
     ku=4
     ALLOCATE(dists(-kl:ku))
     DO iy2=-1,1
        DO ix2=-1,1
           k=3*iy2+ix2
           dists(k) = iy2*(nx+1) + ix2
        END DO
     END DO
 !
     IF(run_init) THEN 
        CALL init(n, dists, 1, mat)
     END IF
 !
 !   Assemble matrix by scanning all grid points
 !
     dense(:) = fdense(grid%x(:))
     DO iy=0,ny
        DO ix=0,nx
           irow = iy*(nx+1)+ix+1
           DO iy2=-1,1
              jy=iy+iy2
              IF(jy.GE.0 .AND. jy.LE.ny) THEN
                 DO ix2=-1,1
                    jx=ix+ix2
                    IF(jx.GE.0 .AND.jx.LE.nx) THEN
                       mele = stencil(ix2,iy2) + zdiag(ix2,iy2)*dense(ix)
                       k=3*iy2+ix2
                       mat%val(irow,k) = mele
                    END IF
                 END DO
              END IF
           END DO
        END DO
     END DO
 !
     DEALLOCATE(dists)
   END SUBROUTINE fdmat_cds
 !--------------------------------------------------------------------------------
   SUBROUTINE fdmat_csr(grid, fdense, icrosst, mat, noinit)
 !
 !  Construct FD matrix
 !
     TYPE(grid2d), INTENT(in)      :: grid
     DOUBLE PRECISION, INTENT(in)  :: icrosst
     TYPE(csr_mat), INTENT(inout)  :: mat
     LOGICAL, INTENT(in), OPTIONAL :: noinit
     INTERFACE
        FUNCTION fdense(x)
          DOUBLE PRECISION, INTENT(in) :: x(:)
          DOUBLE PRECISION :: fdense(SIZE(x))
        END FUNCTION fdense
     END INTERFACE
 !
     INTEGER :: n, nx, ny
     INTEGER :: ix, ix2, jx, iy, jy, iy2,  irow, icol
     DOUBLE PRECISION :: lx, ly, dx, dy, mele
     DOUBLE PRECISION :: dense(0:grid%n(1))
     DOUBLE PRECISION :: stencil(-1:1,-1:1), zdiag(-1:1,-1:1)
     DOUBLE PRECISION :: corr
     LOGICAL :: run_init
 !--------------------------------------------------------------------------------
     run_init = .TRUE.
     IF(PRESENT(noinit)) run_init = .NOT.noinit
 !
 !   Grid properties
 !
     nx = grid%n(1)
     ny = grid%n(2)
     dx = grid%x(1) - grid%x(0)
     dy = grid%y(1) - grid%y(0)
     lx = grid%x(nx)
     ly = grid%y(ny)
     n = PRODUCT(grid%rank)  ! Rank of matrix
 !
 !   Stencil
 !
     stencil = 0.0d0
     zdiag = 0.0d0
 !
     corr = 1.d0+icrosst**2/4.0d0
     stencil(0,0)   = -2.0d0/dx/dx-2.0d0/dy/dy*corr
     stencil(-1,0)  =  1.0/dx/dx
     stencil(1,0)   =  1.0/dx/dx
     stencil(0,-1)  =  1.0/dy/dy*corr
     stencil(0,1)   =  1.0/dy/dy*corr
     stencil(-1,-1) =  icrosst*1.0/4.0/dx/dy
     stencil(1,-1)  =  icrosst*(-1.0/4.0/dx/dy)
     stencil(-1,1)  =  icrosst*(-1.0/4.0/dx/dy)
     stencil(1,1)   =  icrosst*(1.0/4.0/dx/dy)
     zdiag(0,0)     = 1.0d0
 !
 !   Create CSR matrix
     IF(run_init) THEN 
        CALL init(n, 1, mat)
     END IF
 !
 !   Assemble matrix by scanning all grid points
 !
     dense(:) = fdense(grid%x(:))
     DO iy=0,ny
        DO ix=0,nx
           irow = numb(ix,iy)
           DO iy2=-1,1
              jy=iy+iy2
              IF(jy.GE.0 .AND. jy.LE.ny) THEN
                 DO ix2=-1,1
                    jx=ix+ix2
                    IF(jx.GE.0 .AND.jx.LE.nx) THEN
                       icol=numb(jx,jy)
                       mele = stencil(ix2,iy2) + zdiag(ix2,iy2)*dense(ix)
                       CALL putele(mat, irow, icol,mele)
                    END IF
                 END DO
              END IF
           END DO
        END DO
     END DO
 !--------------------------------------------------------------------------------
   CONTAINS
     INTEGER FUNCTION numb(ix,iy)
       INTEGER, INTENT(in) :: ix, iy
       INTEGER :: stride
       stride = grid%rank(1)
       numb = iy*stride + (ix+1)
     END FUNCTION numb
 !--------------------------------------------------------------------------------
   END SUBROUTINE fdmat_csr
 !++
   SUBROUTINE ibc_fdmat_gen(grid, prb)
 !
 !   Generic version
 !
     TYPE(grid2d), INTENT(inout)  :: grid
     CHARACTER(len=*), INTENT(in) :: prb
 !
     IF(ALLOCATED(grid%mata)) THEN
        CALL ibc_fdmat_csr(grid, grid%mata, prb)
     ELSE
        CALL ibc_fdmat_cds(grid, grid%mata_cds, prb)
     END IF
 !
   END SUBROUTINE ibc_fdmat_gen
 !++
   SUBROUTINE ibc_fdmat_csr(grid, mat, prb)
 !
 !   Impose BC
 !
     TYPE(grid2d), INTENT(in)     :: grid
     TYPE(csr_mat), INTENT(inout) :: mat
     CHARACTER(len=*), INTENT(in) :: prb
 !
     DOUBLE PRECISION :: arow(mat%rank)
     INTEGER :: nx, ny, nx1, ny1, n, iy, irow, irow1
 !--------------------------------------------------------------------------------  
     nx = grid%n(1)
     ny = grid%n(2)
     nx1=nx+1
     ny1=ny+1
     n = nx1*ny1
 !
 !  Dirichelt BC on West/East
 !
     IF(prb.EQ.'dddd') THEN
        DO irow=1,ny*nx1+1,nx1
           arow=0.0d0; arow(irow)=1.0d0
           CALL putrow(mat, irow, arow)
           irow1=irow+nx
           arow=0.0d0; arow(irow1)=1.0d0
           CALL putrow(mat, irow1, arow)
        END DO
 !
 !   Neumann on West/East
 !   WARNING: Divide the stencil by 2 => should do the same for RHS!
 !
     ELSE IF(prb.EQ.'nndd') THEN
        DO irow=1,ny*nx1+1,nx1
           iy = irow/nx1
           CALL getrow(mat, irow, arow)
           arow(irow+1) = 2.0d0*arow(irow+1)
           IF(iy.GT.0) arow(irow-nx) = 0.0d0
           IF(iy.LT.ny) arow(irow+nx+2) = 0.0d0
           arow(:) = 0.5d0*arow(:)
           CALL putrow(mat, irow, arow)
        END DO
        DO irow=nx1,n,nx1
           iy = irow/nx1
           CALL getrow(mat, irow, arow)
           arow(irow-1) = 2.0d0*arow(irow-1)
           IF(iy.GT.0) arow(irow-nx-2) = 0.0d0
           IF(iy.LT.ny) arow(irow+nx) = 0.0d0
           arow(:) = 0.5d0*arow(:)
           CALL putrow(mat, irow, arow)
        END DO
 !
     ELSE
        WRITE(*,'(a,a4,a)') 'ibc_mat: prb = ', prb, ' NOT IMPLEMENTED!'
        STOP
     END IF
 !
 !  Dirichlet BC on South/North sides
 !
     DO irow=1,nx1
        arow=0.0d0;  arow(irow)=1.0d0
        CALL putrow(mat, irow, arow)
     END DO
     DO irow1=ny*nx1+1,n
        arow=0.0d0;  arow(irow1)=1.0d0
        CALL putrow(mat, irow1, arow)
     END DO
 !--------------------------------------------------------------------------------
   END SUBROUTINE ibc_fdmat_csr
 !++
   SUBROUTINE ibc_fdmat_cds(grid, mat, prb)
 !
 !   Impose BC
 !
     TYPE(grid2d), INTENT(in)     :: grid
     TYPE(cds_mat), INTENT(inout) :: mat
     CHARACTER(len=*), INTENT(in) :: prb
 !
     INTEGER :: nx, ny, iy, irow
     INTEGER :: n, nx1, ny1
 !--------------------------------------------------------------------------------  
     nx = grid%n(1)
     ny = grid%n(2)
     nx1=nx+1
     ny1=ny+1
     n = nx1*ny1
 !
 !         2  ==   3  ==  4
 !         |       |      |
 !        -1  ==   0  ==  1
 !         |       |      |
 !        -4  ==  -3  == -2
 !
 !  Dirichelt BC on West/East
 !
     IF(prb.EQ.'dddd') THEN
        DO irow=1,ny*nx1+1,nx1
           mat%val(irow,:) = 0.0d0
           mat%val(irow,0) = 1.0d0
           mat%val(irow+nx,:) = 0.0d0
           mat%val(irow+nx,0) = 1.0d0
        END DO
 !
 !   Neumann on West/East
 !   WARNING: Divide the stencil by 2 => should do the same for RHS!
 !
     ELSE IF(prb.EQ.'nndd') THEN
        DO irow=1,ny*nx1+1,nx1
           iy = irow/nx1
           IF(iy.GT.0)  mat%val(irow,-2)=0.0d0
           IF(iy.LT.ny) mat%val(irow,+4)=0.0d0
           mat%val(irow,+1)=2.0d0*mat%val(irow,+1)
           mat%val(irow,:)=0.5d0*mat%val(irow,:)
        END DO
        DO irow=nx1,n,nx1
           iy = irow/nx1
           IF(iy.GT.0)  mat%val(irow,-4)=0.0d0
           IF(iy.LT.ny) mat%val(irow,+2)=0.0d0
           mat%val(irow,-1)=2.0d0*mat%val(irow,-1)
           mat%val(irow,:)=0.5d0*mat%val(irow,:)
        END DO
 !
     ELSE
        WRITE(*,'(a,a4,a)') 'ibc_mat: prb = ', prb, ' NOT IMPLEMENTED!'
        STOP
     END IF
 !
 !  Dirichlet BC on South/North sides
 !
     DO irow=1,nx1
        mat%val(irow,:) = 0.0d0
        mat%val(irow,0) = 1.0d0
     END DO
     DO irow=ny*nx1+1,n
        mat%val(irow,:) = 0.0d0
        mat%val(irow,0) = 1.0d0
     END DO
 !--------------------------------------------------------------------------------
   END SUBROUTINE ibc_fdmat_cds
 !++
 END MODULE fdmat_mod
diff --git a/multigrid/src/gvector_mod.f90 b/multigrid/src/gvector_mod.f90
index 2f0162e..5fddf67 100644
--- a/multigrid/src/gvector_mod.f90
+++ b/multigrid/src/gvector_mod.f90
@@ -1,231 +1,231 @@
 !>
 !> @file gvector_mod.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE gvector
 !
 !    Implementation of 2D vectors with arbitrary
 !    vector bounds and ghost cell width.
 !
 !  T.M. Tran, CRPP-EPFL
 !  September 2013
 !
   USE iso_fortran_env, ONLY : rkind => real64
   IMPLICIT NONE
   PRIVATE
   PUBLIC :: gvector_2d, disp, norm2, &
        &    OPERATOR(+), OPERATOR(-), OPERATOR(*), &
        &    ASSIGNMENT(=)
 
   TYPE gvector_2d
      INTEGER, DIMENSION(2)    :: s, e ! vector internal bounds
      INTEGER, DIMENSION(2)    :: g    ! ghost cell widths
      REAL(rkind), ALLOCATABLE :: val(:,:)
   END TYPE gvector_2d
 
   INTERFACE gvector_2d
      MODULE PROCEDURE constructor
   END INTERFACE gvector_2d
   INTERFACE OPERATOR(+)
      MODULE PROCEDURE add_scal
      MODULE PROCEDURE add_vec
   END INTERFACE OPERATOR(+)
   INTERFACE OPERATOR(-)
      MODULE PROCEDURE minus_vec
      MODULE PROCEDURE substract_vec
   END INTERFACE OPERATOR(-)
   INTERFACE OPERATOR(*)
      MODULE PROCEDURE scale_left
      MODULE PROCEDURE scale_right
   END INTERFACE OPERATOR(*)
   INTERFACE ASSIGNMENT(=)
      MODULE PROCEDURE from_scal
      MODULE PROCEDURE from_vec
   END INTERFACE ASSIGNMENT(=)
   INTERFACE norm2
      MODULE PROCEDURE norm2_gvector_2d
      MODULE PROCEDURE norm2_root_g_2d
      MODULE PROCEDURE norm2_all_g_2d
   END INTERFACE norm2
 
 CONTAINS
 !=======================================================================
   FUNCTION constructor(s, e, g) RESULT(res)
     INTEGER, INTENT(in)           :: s(2), e(2)
     INTEGER, OPTIONAL, INTENT(in) :: g(2)
     TYPE(gvector_2d)              :: res
     INTEGER :: lb(2), ub(2)
     res%g= 0
     IF(PRESENT(g)) res%g = g
     res%s = s
     res%e = e
     lb = res%s - res%g
     ub = res%e + res%g
     ALLOCATE(res%val(lb(1):ub(1),lb(2):ub(2)))
 !
 ! Initialize to 0 on all ghost cells
     res%val(lb(1):s(1)-1,:) = 0._rkind
     res%val(e(1)+1:ub(1),:) = 0._rkind
     res%val(:,lb(2):s(2)-1) = 0._rkind
     res%val(:,e(2)+1:ub(2)) = 0._rkind
   END FUNCTION constructor
 !=======================================================================
   FUNCTION add_vec(lhs, rhs) RESULT(res)
     TYPE(gvector_2d), INTENT(in) :: lhs, rhs
     TYPE(gvector_2d)             :: res
     res = gvector_2d(lhs%s, lhs%e, lhs%g)
     res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = &
          &  lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) + &
          &  rhs%val(res%s(1):res%e(1),res%s(2):res%e(2))
   END FUNCTION add_vec
 !=======================================================================
   FUNCTION add_scal(lhs, rhs) RESULT(res)
     TYPE(gvector_2d), INTENT(in) :: lhs
     REAL(rkind), INTENT(in)      :: rhs
     TYPE(gvector_2d)             :: res
     res = gvector_2d(lhs%s, lhs%e, lhs%g)
     res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = &
          &  lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) + rhs
   END FUNCTION add_scal
 !=======================================================================
   FUNCTION minus_vec(this) RESULT(res)
     TYPE(gvector_2d), INTENT(in) :: this
     TYPE(gvector_2d)             :: res
     res = gvector_2d(this%s, this%e, this%g)
     res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = &
          &  -this%val(res%s(1):res%e(1),res%s(2):res%e(2))
   END FUNCTION minus_vec
 !=======================================================================
   FUNCTION substract_vec(lhs, rhs) RESULT(res)
     TYPE(gvector_2d), INTENT(in) :: lhs, rhs
     TYPE(gvector_2d)             :: res
     res = gvector_2d(lhs%s, lhs%e, lhs%g)
     res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = &
          &  lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) - &
          &  rhs%val(res%s(1):res%e(1),res%s(2):res%e(2))
   END FUNCTION substract_vec
 !=======================================================================
   FUNCTION scale_left(lhs, rhs) RESULT(res)
     REAL(rkind), INTENT(in)      :: lhs
     TYPE(gvector_2d), INTENT(in) :: rhs
     TYPE(gvector_2d)             :: res
     res = gvector_2d(rhs%s, rhs%e, rhs%g)
     res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = &
          &  lhs * rhs%val(res%s(1):res%e(1),res%s(2):res%e(2))
   END FUNCTION scale_left
 !=======================================================================
   FUNCTION scale_right(lhs, rhs) RESULT(res)
     TYPE(gvector_2d), INTENT(in) :: lhs
     REAL(rkind), INTENT(in)      :: rhs
     TYPE(gvector_2d)             :: res
     res = gvector_2d(lhs%s, lhs%e, lhs%g)
     res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = &
          &  lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) * rhs
   END FUNCTION scale_right
 !=======================================================================
 
   SUBROUTINE from_vec(lhs, rhs)
     TYPE(gvector_2d), INTENT(inout) :: lhs
     REAL(rkind), INTENT(in)         :: rhs(:,:)
     INTEGER :: n(2)
     n = lhs%e - lhs%s + 1
     IF(SIZE(rhs,1).NE.n(1) .OR. SIZE(rhs,2).NE.n(2)) THEN
        PRINT*, 'from_vec: sizes of rhs and lhs not equal!'
        STOP
     END IF
     lhs%val(lhs%s(1):lhs%e(1),lhs%s(2):lhs%e(2)) = rhs(:,:)
   END SUBROUTINE from_vec
 !=======================================================================
 
   SUBROUTINE from_scal(lhs, rhs)
     TYPE(gvector_2d), INTENT(inout) :: lhs
     REAL(rkind), INTENT(in)         :: rhs
     lhs%val(lhs%s(1):lhs%e(1),lhs%s(2):lhs%e(2)) = rhs
   END SUBROUTINE from_scal
 !=======================================================================
   SUBROUTINE disp(str,this)
     CHARACTER(len=*), INTENT(in) :: str
     TYPE(gvector_2d), INTENT(in) :: this
     INTEGER :: i
     WRITE(*,'(/a,3(" (",i0,",",i0,") "))') str//': s, e, g =',&
          &   this%s, this%e, this%g
     DO i=LBOUND(this%val,1),UBOUND(this%val,1)
        WRITE(*,'(10(1pe11.3))') (this%val(i,:))
     END DO
   END SUBROUTINE disp
 !=======================================================================
 
   FUNCTION norm2_gvector_2d(this) RESULT(res)
     TYPE(gvector_2d), INTENT(in) :: this
     REAL(rkind)                  :: res
     res = NORM2( this%val(this%s(1):this%e(1), &
          &  this%s(2):this%e(2)) )
   END FUNCTION norm2_gvector_2d
 !=======================================================================
   FUNCTION norm2_root_g_2d(x, comm, root) RESULT(res)
 !
 !  Vector norm of 2d distributed array with ghost cells
 !
     USE mpi
     TYPE(gvector_2d), INTENT(in) :: x
     INTEGER, INTENT(in)          :: comm
     INTEGER, INTENT(in)          :: root
     REAL(rkind)                  :: res
     INTEGER, PARAMETER       :: ndim=2
     INTEGER, DIMENSION(ndim) :: s, e
     REAL(rkind)              :: res_loc
     INTEGER                  :: me, ierr
 !
     CALL mpi_comm_rank(comm, me, ierr)
     s = x%s
     e = x%e
     res_loc = SUM(x%val(s(1):e(1),s(2):e(2))**2)
     res = 0.0
     CALL mpi_reduce(res_loc, res, 1, MPI_DOUBLE_PRECISION, MPI_SUM,&
          &          root, comm, ierr)
     IF(me.EQ.root) res = SQRT(res)    
   END FUNCTION norm2_root_g_2d
 !=======================================================================
   FUNCTION norm2_all_g_2d(x, comm) RESULT(res)
 !
 !  Vector norm of 2d distributed array with ghost cells
 !
     USE mpi
     TYPE(gvector_2d), INTENT(in) :: x
     INTEGER, INTENT(in)          :: comm
     REAL(rkind)                  :: res
     INTEGER, PARAMETER       :: ndim=2
     INTEGER, DIMENSION(ndim) :: s, e
     REAL(rkind)              :: res_loc
     INTEGER                  :: ierr
 !
     s = x%s
     e = x%e
     res_loc = SUM(x%val(s(1):e(1),s(2):e(2))**2)
     CALL mpi_allreduce(res_loc, res, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
          &          comm, ierr)
     res = SQRT(res)    
   END FUNCTION norm2_all_g_2d
 !=======================================================================
 END MODULE gvector
diff --git a/multigrid/src/parmg_mod.f90 b/multigrid/src/parmg_mod.f90
index 5de78ca..25ebc52 100644
--- a/multigrid/src/parmg_mod.f90
+++ b/multigrid/src/parmg_mod.f90
@@ -1,722 +1,722 @@
 !>
 !> @file parmg_mod.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE parmg
 !
 !   parmg: Utilities for parallel multigrid
 !
 !   T.M. Tran, CRPP-EPFL
 !   December 2013
 !
   USE mpi
   USE iso_fortran_env, ONLY : rkind => real64
   USE gvector, ONLY : gvector_2d
   USE stencil, ONLY : stencil_2d
   IMPLICIT NONE
 !
   PRIVATE
   PUBLIC :: grid2_type, mg_info, create_grid, mg, get_lmax, coarse, disp, exchange, &
        &    get_resids, jacobi, prolong, init_restrict, restrict, &
        &    norm_vec, norm_mat
 !
   TYPE grid2_type
      INTEGER, DIMENSION(2)    :: s, e, s0, e0, npt_loc, npt
      REAL(rkind), ALLOCATABLE :: x(:)
      REAL(rkind), ALLOCATABLE :: y(:)
      TYPE(gvector_2d)         :: f
      TYPE(gvector_2d)         :: v
      TYPE(stencil_2d)         :: fdmat
      TYPE(stencil_2d)         :: restrict_mat
   END TYPE grid2_type
 !
   TYPE mg_info
      INTEGER :: comm   ! Communicator
      INTEGER :: nu1    ! Relaxation down sweeps
      INTEGER :: nu2    ! Relaxation up sweeps
      INTEGER :: mu     ! mu-cycle number
      INTEGER :: nu0    ! Number of FMG cycles
      INTEGER :: levels ! Number of mg levels
      INTEGER :: direct_solve_nits ! Jacobit nits for direct_solve
      CHARACTER(len=4) :: relax    ! Type of relation
      REAL(rkind)      :: omega    ! for weighted Jacobi relaxation
   END TYPE mg_info
 !
   INTERFACE create_grid
      MODULE PROCEDURE create_grid_2d
   END INTERFACE create_grid
   INTERFACE mg
      MODULE PROCEDURE mg_2d
   END INTERFACE mg
   INTERFACE coarse
      MODULE PROCEDURE coarse_1d, coarse_2d
   END INTERFACE coarse
   INTERFACE exchange
      MODULE PROCEDURE exchange_g_2d, exchange_g_2d_new
   END INTERFACE exchange
   INTERFACE prolong
      MODULE PROCEDURE prolong_g_2d
   END INTERFACE prolong
   INTERFACE restrict
      MODULE PROCEDURE restrict_g_2d
   END INTERFACE restrict
   INTERFACE jacobi
      MODULE PROCEDURE jacobi_stencila_2d
      MODULE PROCEDURE jacobi_stencilg_2d
   END INTERFACE jacobi
   INTERFACE get_resids
      MODULE PROCEDURE resids_stencila_2d
      MODULE PROCEDURE resids_stencilg_2d
   END INTERFACE get_resids
   INTERFACE disp
      MODULE PROCEDURE dispi_0, dispi_1
   END INTERFACE disp
 CONTAINS
 !
 !--------------------------------------------------------------------------------
   SUBROUTINE create_grid_2d(x, y, s_in, e_in, id, prb, grids, comm)
 !
 !    Create arrays of partitionned grids
 !
     USE stencil, ONLY : init
 !
     REAL(rkind), INTENT(in)      :: x(0:), y(0:)     ! Global coordinates
     INTEGER, INTENT(in)          :: s_in(2), e_in(2) ! Partition of finest grid
     INTEGER, INTENT(in)          :: id(:,:)          ! Structure of stencil
     CHARACTER(len=*), INTENT(in) :: prb
     INTEGER, INTENT(in)          :: comm
     TYPE(grid2_type)             :: grids(:)
 !
     INTEGER :: levels
     INTEGER :: s0(2), e0(2), s(2), e(2)
     INTEGER :: npt_loc(2), npt_loc_min(2), npt_glob(2)
     INTEGER :: l, ierr
 !
     levels = SIZE(grids)
     s = s_in
     e = e_in
 !
     DO l=1,levels
        IF(l.GT.1) THEN
           CALL coarse(s,e)
        END IF
        npt_loc = e-s+1
        CALL mpi_allreduce(s, s0, 2, MPI_INTEGER, MPI_MIN, comm, ierr)
        CALL mpi_allreduce(e, e0, 2, MPI_INTEGER, MPI_MAX, comm, ierr)
        CALL mpi_allreduce(npt_loc, npt_loc_min, 2, MPI_INTEGER, MPI_MIN, comm, ierr)
        IF(MINVAL(npt_loc_min) .LT. 2) THEN
           PRINT*, 'CREATE_GRID: number intervals too small!'
           STOP
        END IF
        npt_glob = e0+1
        grids(l)%s0 = s0
        grids(l)%e0 = e0
        grids(l)%s  = s
        grids(l)%e  = e
        grids(l)%npt_loc = npt_loc
        grids(l)%npt     = npt_glob
        grids(l)%f = gvector_2d(s, e, [1,1]) ! Arrays with ghost cell
        grids(l)%v = gvector_2d(s, e, [1,1])
        ALLOCATE(grids(l)%x(s0(1):e0(1)))    ! Global coords (x,y)
        ALLOCATE(grids(l)%y(s0(2):e0(2)))
        IF(l.EQ.1) THEN
           grids(1)%x = x
           grids(1)%y = y
        ELSE
           grids(l)%x(:) = grids(l-1)%x(0::2)
           grids(l)%y(:) = grids(l-1)%y(0::2)
        END IF
     END DO
 !
 !   Set up FD matrix
 !
     DO l=1,levels
        s = grids(l)%s
        e = grids(l)%e
        CALL init(s, e, id, .FALSE., grids(l)%fdmat, comm)
     END DO
 !
 !   Set up restriction stencil
 !
   DO l=2,levels
      CALL init_restrict(grids(l), prb, comm)
   END DO
 !
   END SUBROUTINE create_grid_2d
 !--------------------------------------------------------------------------------
   RECURSIVE SUBROUTINE mg_2d(grids, info, l)
 !
 !   Execute a recursive V-cycle
 !
     USE gvector, ONLY : ASSIGNMENT(=), OPERATOR(+)
     TYPE(grid2_type), INTENT(inout)  :: grids(:)
     TYPE(mg_info), INTENT(in)        :: info
     INTEGER, INTENT(in)              :: l
 !
     TYPE(gvector_2d)      :: resids, v_prolong
     INTEGER, DIMENSION(2) :: s0, e0, s, e, g=[1,1]
     INTEGER :: comm, levels, k
 !
     comm   = info%comm
     levels = info%levels
 !
     s0 = grids(l)%s0; e0 = grids(l)%e0
     s  = grids(l)%s;  e  = grids(l)%e
     resids = gvector_2d(s, e, g)
 !
     IF(l.EQ.levels) THEN
        CALL direct_solve(grids(l)%fdmat, grids(l)%v, grids(l)%f)
     ELSE
        CALL relax(info%nu1)
        resids = get_resids(comm, grids(l)%fdmat, grids(l)%v, grids(l)%f)
        CALL exchange(comm, resids)
        CALL restrict(grids(l+1)%restrict_mat, resids, grids(l+1)%f)
        grids(l+1)%v = 0.0d0
 !
 !   Only 1 call to the coarsest level
        DO k=1,MERGE(info%mu, 1, (l+1).NE.levels) 
           CALL mg(grids, info, l+1)
        END DO
 !
        v_prolong = gvector_2d(s, e, g)
        CALL exchange(comm, grids(l+1)%v)
        CALL prolong(grids(l+1)%v, v_prolong)
        grids(l)%v = grids(l)%v + v_prolong
        CALL relax(info%nu2)
     END IF
 !
   CONTAINS
     SUBROUTINE relax(nu)
       INTEGER, INTENT(in) :: nu
       SELECT CASE (TRIM(info%relax))
       CASE ("jac")
          CALL jacobi(comm, grids(l)%fdmat, info%omega, nu, grids(l)%v, grids(l)%f)
       CASE default
          PRINT*, "relax ", info%relax, " NOT IMPLEMENTED!"
          STOP
       END SELECT
     END SUBROUTINE relax
     SUBROUTINE direct_solve(mat, v, f)
       TYPE(stencil_2d), INTENT(in)    :: mat
       TYPE(gvector_2d), INTENT(inout) :: v
       TYPE(gvector_2d), INTENT(in)    :: f
       v = 0.0d0
       CALL jacobi(comm, mat, 1.0_rkind, info%direct_solve_nits, v, f)
     END SUBROUTINE direct_solve
   END SUBROUTINE mg_2d
 !--------------------------------------------------------------------------------
   FUNCTION get_lmax(s_in, npt_loc, npt_min, comm) RESULT(lmax)
 !
 !   Get max number of levels on all processes
 !
     INTEGER             :: lmax
     INTEGER, INTENT(in) :: s_in, npt_loc, npt_min, comm
     INTEGER :: me, ierr
     INTEGER :: s, e, kpt_loc, kpt, kpt_loc_min
 !    
     CALL mpi_comm_rank(comm, me, ierr)
     s = s_in
     kpt_loc = npt_loc
     e = s+npt_loc-1
     lmax = 1
     DO
        CALL mpi_allreduce(kpt_loc, kpt_loc_min, 1, MPI_INTEGER, MPI_MIN, &
             &             comm, ierr)
        CALL mpi_allreduce(kpt_loc, kpt, 1, MPI_INTEGER, MPI_SUM, &
             &             comm, ierr)
 !
 !   Stop if npt-1 not even or when minumum local npt is attained
        IF(MODULO(kpt-1,2).NE.0 .OR. kpt_loc_min .LE. npt_min) EXIT
 !
        lmax = lmax+1
        CALL coarse(s, e)
        kpt_loc = e-s+1
     END DO
 !
   END FUNCTION get_lmax
 !--------------------------------------------------------------------------------
   SUBROUTINE coarse_1d(s, e)
 !
 !  Compute (s,e) of next coarse grid
 !
     INTEGER, INTENT(inout)  :: s, e
     INTEGER :: s0, npt, i
 !
 !   Previous odd indices are discarded
     s0 = s
     IF( MODULO(s0,2) .NE. 0 ) THEN 
        s0 = s+1  
     END IF
 !
 !   Count local number of points
     npt = 0
     DO i=s0,e,2
        npt = npt+1
     END DO
 !
 !   Coarse s, e
     s = s0/2
     e = s + npt - 1
   END SUBROUTINE coarse_1d
 !--------------------------------------------------------------------------------
   SUBROUTINE coarse_2d(s, e)
 !
 !  Compute (s,e) of next coarse grid
 !
     INTEGER, INTENT(inout)  :: s(2), e(2)
 !
     CALL coarse_1d(s(1), e(1))
     CALL coarse_1d(s(2), e(2))
   END SUBROUTINE coarse_2d
 !--------------------------------------------------------------------------------
   SUBROUTINE dispi_0(str, a, comm)
 !
 !   Display integer local scalar
 !
     INTEGER, INTENT(in)          :: a, comm
     CHARACTER(len=*), INTENT(in) :: str
     INTEGER :: npes, me, ierr
     INTEGER, ALLOCATABLE, DIMENSION(:) :: a_gather(:)
 !
     CALL MPI_COMM_RANK(comm, me, ierr)
     CALL MPI_COMM_SIZE(comm, npes, ierr)
     ALLOCATE(a_gather(npes))
     CALL MPI_GATHER(a, 1, MPI_INTEGER, a_gather, 1, MPI_INTEGER, &
          &          0, comm, ierr)    
     IF( me.EQ.0 ) THEN
        WRITE(*,'(a/(20i6))') str, a_gather
     END IF
     DEALLOCATE(a_gather)
   END SUBROUTINE dispi_0
 !--------------------------------------------------------------------------------
   SUBROUTINE dispi_1(str, a, comm)
 !
 !   Display integer local array
 !
     INTEGER, INTENT(in)          :: a(:), comm
     CHARACTER(len=*), INTENT(in) :: str
     INTEGER :: npes, me, ierr, n, i
     INTEGER, ALLOCATABLE, DIMENSION(:) :: a_gather(:,:)
 !
     n = SIZE(a,1)
     CALL MPI_COMM_RANK(comm, me, ierr)
     CALL MPI_COMM_SIZE(comm, npes, ierr)
     ALLOCATE(a_gather(n,npes))
     CALL MPI_GATHER(a, n, MPI_INTEGER, a_gather, n, MPI_INTEGER, &
          &          0, comm, ierr)    
     IF( me.EQ.0 ) THEN
        WRITE(*,'(a)') str
        DO i=1,n
           WRITE(*,'(20i6)') a_gather(i,:)
        END DO
     END IF
     DEALLOCATE(a_gather)
   END SUBROUTINE dispi_1
 !--------------------------------------------------------------------------------
   SUBROUTINE exchange_g_2d_new(comm, u)
 !
 !  Exchange ghost cells with (west,east,south,north) neighbors.
 !  Assume same ghost cells on each dimension:
 !    u%g(1) : number of ghost cells on west and east boundaries
 !    u%g(2) : number of ghost cells on south and north boundaries
 !
     INTEGER, INTENT(in)             :: comm
     TYPE(gvector_2d), INTENT(inout) :: u
     INTEGER  :: neighs(4), ierr
 !
     CALL mpi_cart_shift(comm, 0, 1, neighs(1), neighs(2), ierr)
     CALL mpi_cart_shift(comm, 1, 1, neighs(3), neighs(4), ierr)
     CALL exchange_g_2d(comm, neighs, u)
   END SUBROUTINE exchange_g_2d_new
 !--------------------------------------------------------------------------------
   SUBROUTINE exchange_g_2d(comm, neighs, u)
 !
 !  Exchange ghost cells with (west,east,south,north) neighbors.
 !  Assume same ghost cells on each dimension:
 !    u%g(1) : number of ghost cells on west and east boundaries
 !    u%g(2) : number of ghost cells on south and north boundaries
 !
     INTEGER, INTENT(in)             :: comm
     INTEGER, INTENT(in)             :: neighs(4)
     TYPE(gvector_2d), INTENT(inout) :: u
 !
     INTEGER                  :: cols, rows
     INTEGER                  :: ierr
     INTEGER, PARAMETER       :: ndim=2
     INTEGER, DIMENSION(ndim) :: g, lb, ub, s, e, n
 !
     s = u%s
     e = u%e
     g = u%g
     lb = s - g
     ub = e + g
     n = ub - lb + 1   ! include ghost cells
 !
 !   g(2) matrix full rows with stride n(1)
     CALL mpi_type_vector(n(2), g(2), n(1), MPI_DOUBLE_PRECISION, rows, ierr)
     CALL mpi_type_commit(rows, ierr)
 !
 !   g(1) contiguous matrix full columns
     CALL mpi_type_contiguous(n(1)*g(1), MPI_DOUBLE_PRECISION,  cols, ierr)
     CALL mpi_type_commit(cols, ierr)
 !
 !  Exchange along first dimension
     CALL mpi_sendrecv(u%val(s(1),  lb(2)), 1, rows, neighs(1), 0, &
          &            u%val(e(1)+1,lb(2)), 1, rows, neighs(2), 0, &
          &                                  comm, MPI_STATUS_IGNORE, ierr)
     CALL mpi_sendrecv(u%val(e(1)-g(1)+1,lb(2)), 1, rows, neighs(2), 0, &
          &            u%val(lb(1),      lb(2)), 1, rows, neighs(1), 0, &
          &                                  comm, MPI_STATUS_IGNORE, ierr)
 !
 !  Exchange along second dimension
     CALL mpi_sendrecv(u%val(lb(1),s(2)),   1, cols, neighs(3), 0, &
          &            u%val(lb(1),e(2)+1), 1, cols, neighs(4), 0, &
          &                                  comm, MPI_STATUS_IGNORE, ierr)
     CALL mpi_sendrecv(u%val(lb(1),e(2)-g(2)+1),   1, cols, neighs(4), 0, &
          &            u%val(lb(1),lb(2)),         1, cols, neighs(3), 0, &
          &                                  comm, MPI_STATUS_IGNORE, ierr)
   END SUBROUTINE exchange_g_2d
 !--------------------------------------------------------------------------------
   SUBROUTINE prolong_g_2d(vbar, v)
 !
 !  2D bilinear prolongation
 !
     TYPE(gvector_2d), INTENT(in)    :: vbar
     TYPE(gvector_2d), INTENT(inout) :: v
 !
     INTEGER :: i,j,i1,i2,j1,j2
 !
     i1 = v%s(1)-MODULO(v%s(1),2); i2 = v%e(1)+MODULO(v%e(1),2)
     j1 = v%s(2)-MODULO(v%s(2),2); j2 = v%e(2)+MODULO(v%e(2),2)
 !
 !    Even numbered nodes on fine mesh
 !
     DO j=j1,j2,2
        DO i=i1,i2,2
           v%val(i,j) = vbar%val(i/2,j/2)
        END DO
     END DO
 !
 !   Linear interpolation on x
 !
     DO j=j1,j2,2
        DO i=i1+1,i2-1,2
           v%val(i,j) = 0.5d0*(v%val(i-1,j)+v%val(i+1,j))
        END DO
     END DO
 !
 !   Linear interpolation on y
 !
     DO j=j1+1,j2-1,2
        DO i=i1,i2
           v%val(i,j) = 0.5d0*(v%val(i,j-1)+v%val(i,j+1))
        END DO
     END DO
   END SUBROUTINE prolong_g_2d
 !--------------------------------------------------------------------------------
   SUBROUTINE init_restrict(grid, prb, comm)
 !
 !  Set up restriction stencil
 !
     USE stencil, ONLY : init
     TYPE(grid2_type), INTENT(inout) :: grid
     CHARACTER(len=*), INTENT(in)    :: prb
     INTEGER, INTENT(in)             :: comm
 !
     INTEGER, PARAMETER :: npoints=9, ndim=2
     INTEGER :: s(2), e(2), n(2), id(9,2)
     INTEGER :: i, j
 !
 ! Stencil structure initialization
 !
     s = grid%s
     e = grid%e
     n = grid%npt-1
 !
 !           N
 !       6---7---8
 !       |   |   |
 !    W  4---0---5   E   Numbering of stencil
 !       |   |   |
 !       1---2---3
 !           S
 !
     id = RESHAPE([0, -1, 0, 1,-1, 1,-1, 0, 1,  &
          &        0, -1,-1,-1, 0, 0, 1, 1, 1], &
          &       [npoints, ndim])
     CALL init(s, e, id, .FALSE., grid%restrict_mat, comm)
 !
 ! Fill in stencil
 !
     DO j=s(2),e(2)
        DO i=s(1),e(1)
           grid%restrict_mat%val(i,j,:) = [4._rkind, &
          &        1._rkind, 2._rkind, 1._rkind, &
          &        2._rkind,           2._rkind, &
          &        1._rkind, 2._rkind, 1._rkind ]&
          &            / 16._rkind
        END DO
     END DO
 !
 ! Apply Dirichlet BC
 !
     IF(s(1).EQ.0 .AND. prb(1:1).EQ.'d') THEN       ! West face
        grid%restrict_mat%val(s(1),:,3) = 0._rkind
        grid%restrict_mat%val(s(1),:,5) = 0._rkind
        grid%restrict_mat%val(s(1),:,8) = 0._rkind
     END IF
     IF(e(1).EQ.n(1) .AND. prb(2:2).EQ.'d') THEN    ! East face
        grid%restrict_mat%val(e(1),:,1) = 0._rkind
        grid%restrict_mat%val(e(1),:,4) = 0._rkind
        grid%restrict_mat%val(e(1),:,6) = 0._rkind
     END IF
     IF(s(2).EQ.0 .AND. prb(3:3).EQ.'d') THEN       ! South face
        grid%restrict_mat%val(:,s(2),6) = 0._rkind
        grid%restrict_mat%val(:,s(2),7) = 0._rkind
        grid%restrict_mat%val(:,s(2),8) = 0._rkind
     END IF
     IF(e(2).EQ.n(2) .AND. prb(4:4).EQ.'d') THEN    ! North face
        grid%restrict_mat%val(:,e(2),1) = 0._rkind
        grid%restrict_mat%val(:,e(2),2) = 0._rkind
        grid%restrict_mat%val(:,e(2),3) = 0._rkind
     END IF
   END SUBROUTINE init_restrict
 !--------------------------------------------------------------------------------
   SUBROUTINE jacobi_stencila_2d(mat, omega, nu, v, f)
 !
 !   Weighted Jacobi relaxation
 !
     TYPE(stencil_2d),INTENT(in)             :: mat
     REAL(rkind), INTENT(in)                 :: omega
     INTEGER, INTENT(in)                     :: nu
     REAL(rkind), ALLOCATABLE, INTENT(inout) :: v(:,:)
     REAL(rkind), ALLOCATABLE, INTENT(in)    :: f(:,:)
 !
     REAL(rkind), ALLOCATABLE :: temp(:,:), inv_diag(:,:)
     INTEGER, DIMENSION(2)    :: smin, emax, s, e, d, lb, ub
     INTEGER                  :: it, k, i, j
 !
     s(:) = mat%s(:)
     e(:) = mat%e(:)
     smin(:) = mat%s0(:)
     emax(:) = mat%e0(:)
 !
     ALLOCATE(temp(s(1):e(1),s(2):e(2)))
     ALLOCATE(inv_diag(s(1):e(1),s(2):e(2)))
 !
     inv_diag(:,:) = omega/mat%val(:,:,0)
     DO it=1,nu
        temp(:,:) = f(s(1):e(1),s(2):e(2))
        DO k=1,mat%npoints-1   ! exclude the diagonal term, f - (L+U)*v
           d(:) = mat%id(k,:)
           lb = MAX(smin, smin-d, mat%s)
           ub = MIN(emax, emax-d, mat%e)
           DO j=lb(2),ub(2)
              DO i=lb(1),ub(1)
                 temp(i,j) = temp(i,j) - mat%val(i,j,k)*v(i+d(1),j+d(2))
              END DO
           END DO
        END DO
        temp = temp *  inv_diag
        v(s(1):e(1),s(2):e(2)) = (1.d0-omega)*v(s(1):e(1),s(2):e(2)) + temp
     END DO
 !
     DEALLOCATE(temp)
     DEALLOCATE(inv_diag)
   END SUBROUTINE jacobi_stencila_2d
 !--------------------------------------------------------------------------------
   SUBROUTINE jacobi_stencilg_2d(comm, mat, omega, nu, v, f)
 !
 !   Weighted Jacobi relaxation
 !
     USE gvector, ONLY : ASSIGNMENT(=)
     INTEGER, INTENT(in)             :: comm
     TYPE(stencil_2d),INTENT(in)     :: mat
     REAL(rkind), INTENT(in)         :: omega
     INTEGER, INTENT(in)             :: nu
     TYPE(gvector_2d), INTENT(inout) :: v
     TYPE(gvector_2d), INTENT(in)    :: f
 !
     REAL(rkind), ALLOCATABLE :: temp(:,:), inv_diag(:,:)
     INTEGER, DIMENSION(2) :: s, e, d
     INTEGER               :: it, k, i, j
 !
     s(:) = v%s(:)
     e(:) = v%e(:)
 !
     ALLOCATE(temp(s(1):e(1),s(2):e(2)))
     ALLOCATE(inv_diag(s(1):e(1),s(2):e(2)))
 !
     inv_diag(:,:) = omega/mat%val(:,:,0)
     DO it=1,nu
        CALL exchange(comm, v)
        temp(:,:) = f%val(s(1):e(1),s(2):e(2))
        DO k=1,mat%npoints-1   ! exclude the diagonal term, f - (L+U)*v
           d(:) = mat%id(k,:)
           DO j=s(2),e(2)
              DO i=s(1),e(1)
                 temp(i,j) = temp(i,j) - &
                      &   mat%val(i,j,k) * v%val(i+d(1),j+d(2))
              END DO
           END DO
        END DO
        temp = temp *  inv_diag
        v%val(s(1):e(1),s(2):e(2)) = (1.d0-omega) * v%val(s(1):e(1),s(2):e(2)) + &
             &                       temp
     END DO
   END SUBROUTINE jacobi_stencilg_2d
 !--------------------------------------------------------------------------------
   FUNCTION resids_stencila_2d(mat, xarr, farr) RESULT(res)
 !
 !   Return residuals res = mat*x, where x, farr and res are simple arrays
 !
     TYPE(stencil_2d), INTENT(in)         :: mat
     REAL(rkind), ALLOCATABLE, INTENT(in) :: xarr(:,:)
     REAL(rkind), ALLOCATABLE, INTENT(in) :: farr(:,:)
     REAL(rkind)           :: res(LBOUND(xarr,1):UBOUND(xarr,1), &
          &                   LBOUND(xarr,2):UBOUND(xarr,2))
     INTEGER               :: k, i, j
     INTEGER, DIMENSION(2) :: smin, emax, d, lb, ub
 !
     smin(:) = mat%s0(:)
     emax(:) = mat%e0(:)
     res = farr
     DO k=0,mat%npoints-1
        d(:) = mat%id(k,:)
        lb = MAX(smin, smin-d, mat%s)
        ub = MIN(emax, emax-d, mat%e)
        DO j=lb(2),ub(2)
           DO i=lb(1),ub(1)
              res(i,j) = res(i,j) - mat%val(i,j,k)*xarr(i+d(1),j+d(2))
           END DO
        END DO
     END DO
   END FUNCTION resids_stencila_2d
 !--------------------------------------------------------------------------------
   FUNCTION resids_stencilg_2d(comm, mat, xarr, farr) RESULT(res)
 !
 !   Return residuals res= f-mat*x, where x, f and res are gvectors
 !
     INTEGER, INTENT(in)             :: comm
     TYPE(stencil_2d), INTENT(in)    :: mat
     TYPE(gvector_2d), INTENT(inout) :: xarr
     TYPE(gvector_2d), INTENT(in)    :: farr
     TYPE(gvector_2d)                :: res
     INTEGER               :: k, i, j
     INTEGER, DIMENSION(2) :: s, e, d
 !
     s = xarr%s
     e = xarr%e
     res = gvector_2d(xarr%s, xarr%e, xarr%g)
     res%val = farr%val
     CALL exchange(comm, xarr)
     DO k=0,mat%npoints-1
        d(:) = mat%id(k,:)
        DO j=s(2),e(2)
           DO i=s(1),e(1)
              res%val(i,j) = res%val(i,j) - mat%val(i,j,k)*xarr%val(i+d(1),j+d(2))
           END DO
        END DO
     END DO
   END FUNCTION resids_stencilg_2d
 !--------------------------------------------------------------------------------
   SUBROUTINE restrict_g_2d(mat, f, fbar)
 !
 !   2D full weighting restriction
 !
     TYPE(stencil_2d), INTENT(in)    :: mat
     TYPE(gvector_2d), INTENT(in)    :: f
     TYPE(gvector_2d), INTENT(inout) :: fbar
 !
     INTEGER, DIMENSION(2) :: s, e, d
     INTEGER               :: k, i, j
 !
     s = fbar%s
     e = fbar%e
 !
 ! Diagonal contributions: d(0) = (0,0)
     DO j=s(2),e(2)
        DO i=s(1),e(1)
           fbar%val(i,j) = mat%val(i,j,0) * f%val(2*i,2*j)
        END DO
     END DO
 !
     DO k=1,mat%npoints-1
        d(:) = mat%id(k,:)
        DO j=s(2),e(2)
           DO i=s(1),e(1)
              fbar%val(i,j) = fbar%val(i,j) + &
                &   mat%val(i,j,k) * f%val(2*i+d(1),2*j+d(2))
           END DO
        END DO
     END DO
    
   END SUBROUTINE restrict_g_2d
 !--------------------------------------------------------------------------------
   REAL(rkind) FUNCTION norm_vec(x, comm, root)
 !
 !  Infinity vector norm
 !
     TYPE(gvector_2d), INTENT(in)  :: x
     INTEGER, INTENT(in)           :: comm
     INTEGER, OPTIONAL, intent(in) :: root
     REAL(rkind) :: temp
     INTEGER :: ierr
     temp = MAXVAL( ABS(x%val(x%s(1):x%e(1),x%s(2):x%e(2))) )
     IF(PRESENT(root)) THEN
        CALL mpi_reduce(temp, norm_vec, 1, MPI_DOUBLE_PRECISION, MPI_MAX, &
             &          root, comm, ierr)
     ELSE
        CALL mpi_allreduce(temp, norm_vec, 1, MPI_DOUBLE_PRECISION, MPI_MAX, &
             &             comm, ierr)
     END IF
   END FUNCTION norm_vec
 !--------------------------------------------------------------------------------
   REAL(rkind) FUNCTION norm_mat(mat, comm, root)
 !
 !  Infinity matrix norm
 !
     TYPE(stencil_2d), INTENT(in)  :: mat
     INTEGER, INTENT(in)           :: comm
     INTEGER, OPTIONAL, intent(in) :: root
     REAL(rkind) :: arr_temp(mat%s(1):mat%e(1),mat%s(2):mat%e(2))
     REAL(rkind) :: temp
     INTEGER :: i, j, s(2), e(2), ierr
     s = mat%s; e = mat%e
     DO j=s(2),e(2)
        DO i=s(1),e(1)
           arr_temp(i,j) = SUM(ABS(mat%val(i,j,:)))
        END DO
     END DO
     temp = MAXVAL(arr_temp)
     IF(PRESENT(root)) THEN
        CALL mpi_reduce(temp, norm_mat, 1, MPI_DOUBLE_PRECISION, MPI_MAX, &
             &          root, comm, ierr)
     ELSE
        CALL mpi_allreduce(temp, norm_mat, 1, MPI_DOUBLE_PRECISION, MPI_MAX, &
             &             comm, ierr)
     END IF
   END FUNCTION norm_mat
 !--------------------------------------------------------------------------------
 END MODULE parmg
diff --git a/multigrid/src/partition.f90 b/multigrid/src/partition.f90
index 1a3b2e6..27cd714 100644
--- a/multigrid/src/partition.f90
+++ b/multigrid/src/partition.f90
@@ -1,77 +1,77 @@
 !>
 !> @file partition.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
   USE mpi
   USE pputils2, ONLY : dist1d
   USE parmg,    ONLY : get_lmax, coarse, disp
   IMPLICIT NONE
 !
   INTEGER :: me, npes, ierr
   INTEGER :: n, npt, npt_loc, s, e
   INTEGER :: l, lmax
 !
   CALL MPI_INIT(ierr)
   CALL MPI_COMM_RANK(MPI_COMM_WORLD, me, ierr)
   CALL MPI_COMM_SIZE(MPI_COMM_WORLD, npes, ierr)
 !
   IF(me.EQ.0) THEN
      WRITE(*,*) 'Enter n'
      READ(*,*) n
   END IF
   CALL mpi_bcast(n,1,MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
 !
 !   Partition ot finest grid
 !
   npt = n+1
   CALL dist1d(MPI_COMM_WORLD, 0, npt, s, npt_loc)
   e = s+npt_loc-1
   CALL disp('start index=', s, MPI_COMM_WORLD)
 !
 !   Max number of levels
 !
   lmax = get_lmax(s, npt_loc, 1, MPI_COMM_WORLD)
   IF(me.EQ.0) WRITE(*,'(a,i0)') 'Max number of levels ', lmax
 !
 !   Grid coarsening
 !
   DO l=1,lmax
      IF(l.GT.1) THEN
         CALL coarse(s, e)
         npt_loc = e-s+1
         CALL mpi_allreduce(npt_loc, npt, 1, MPI_INTEGER, MPI_SUM, &
              &             MPI_COMM_WORLD, ierr)
      END IF
      IF(me.EQ.0) WRITE(*, '(a,i3)') 'level', l
      CALL disp('s      ', s, MPI_COMM_WORLD)
      CALL disp('e      ', e, MPI_COMM_WORLD)
      CALL disp('npt_loc', npt_loc, MPI_COMM_WORLD)
      CALL disp('npt    ', npt, MPI_COMM_WORLD)
   END DO
 !
   CALL MPI_FINALIZE(ierr)
 !
 !+++++
 END PROGRAM main
diff --git a/multigrid/src/poisson_fd.f90 b/multigrid/src/poisson_fd.f90
index d44100d..3710133 100644
--- a/multigrid/src/poisson_fd.f90
+++ b/multigrid/src/poisson_fd.f90
@@ -1,485 +1,485 @@
 !>
 !> @file poisson_fd.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 ! Solving the following 2d PDE using finite differences:
 !
 !   d2/dx2 f(x,y) + tau*d2/dxdy f(x,y) + d2/dy2 f(x,y) = w(x,y),  x,y in [0:Lx][0:Ly]
 !      w(x,y) = -4*pi^2 *[(kx^2/Lx^2+ky^2/Ly^2)*cos(2*kx*pi*x/Lx)*sin(2*ky*pi*y/Ly)
 !               -tau*(kx*ky)/(Lx*Ly)*sin(2*kx*pi*x/Lx)*cos(2*ky*pi*y/Lx)]
 !
 !   West, East boundaries:   Neumann
 !   South, North boundaries: Dirichlet
 !
 !   Analytic solution : f(x,y) = cos(2*kx*pi*x/Lx)*sin(2*kx*pi*y/Ly)
 !
   USE multigrid
   USE fdmat_mod
   IMPLICIT NONE
   INCLUDE 'mpif.h'
 !
   INTEGER, PARAMETER :: nnumx=32
 !
   INTEGER :: ierr, np, me
   DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
   DOUBLE PRECISION :: Lx, Ly, icrosst, beta, miome
   INTEGER :: n, nx,ny,nz,kx,ky
   CHARACTER(len=4) :: prb
   INTEGER :: nits
   DOUBLE PRECISION :: atol, rtol
   LOGICAL :: nldirect, nldebug
 !
   TYPE(mg_info) :: info  ! info for MG
   INTEGER :: levels, nnu, mu, nu0
 !
   INTEGER  :: inu, nu1(nnumx), nu2(nnumx), niter(nnumx)
   DOUBLE PRECISION :: titer(nnumx)
 !
   LOGICAL :: nlfixed
   DOUBLE PRECISION :: omega
   CHARACTER(len=4) :: mat_type, relax
 !
   DOUBLE PRECISION :: dx, dy
   INTEGER :: ix, iy, l, its
   DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:), dense(:)
   TYPE(grid2d), ALLOCATABLE :: grids(:)
 !
   DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_ana2d(:,:), sol_direct2d(:,:)
   DOUBLE PRECISION, POINTER             :: sol_ana(:), sol_direct(:)
   DOUBLE PRECISION :: err_direct, resid_direct
   DOUBLE PRECISION :: norma, normb
   DOUBLE PRECISION, ALLOCATABLE :: rresid(:), resid(:), errdisc(:)
   DOUBLE PRECISION :: t0, tsetup, tmat(2), tdirect, tbsolve
   DOUBLE PRECISION, EXTERNAL ::  mem
 !
   NAMELIST /parameters/ prb, mat_type,nx, ny, nz, kx, ky, Lx, Ly, icrosst, beta, &
        &                miome, nldebug, nlfixed, levels, nnu, nu1, nu2, mu, nu0, &
        &                relax,omega, nldirect, nits, atol, rtol
 !--------------------------------------------------------------------------------
 !                       1.0 Prologue
 !
   CALL MPI_INIT(ierr)
   CALL MPI_COMM_RANK(MPI_COMM_WORLD,me,ierr)
   CALL MPI_COMM_SIZE(MPI_COMM_WORLD,np,ierr)
 !
 !   Default inputs
 !
   nx=32
   ny=32
   nz=1
   kx=1
   ky=1
   icrosst=1.0d0
   Lx = 1.0D0
   Ly = 1.0D0
   beta = 0d0
   miome = 200d0
   nldebug = .FALSE.
   prb = 'dddd'
   mat_type = 'cds'
   nldirect = .TRUE.
 !
   nlfixed = .FALSE.
   levels = 2
   nnu = 1
   nu1 = 1
   nu2 = 1
   mu = 1
   nu0 = 1
   nits = 10
   atol = 1.e-8; rtol = 1.e-8
   relax = 'jac'
   omega = 0.6667
 !
   IF(me==0) THEN
      READ(*,parameters)
      WRITE(*,parameters)
   END IF
 !
 ! Send input parameters to other processors
 !
   CALL MPI_BCAST(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL MPI_BCAST(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL MPI_BCAST(nz, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL MPI_BCAST(kx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL MPI_BCAST(ky, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL MPI_BCAST(icrosst, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
   CALL MPI_BCAST(Lx,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr)
   CALL MPI_BCAST(Ly,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr)
   CALL MPI_BCAST(beta,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr)
   CALL MPI_BCAST(miome, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
 !
   CALL mpi_bcast(nldebug, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nldirect, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nnu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nu1, nnumx, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nu2, nnumx, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nu0, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(mu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(relax, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(mat_type, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(atol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
   CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
 !
   IF(nnu.GT.nnumx) THEN
      IF(me.EQ.0) THEN
         PRINT*, 'Value of nnu larger than', nnumx
      END IF
      CALL mpi_finalize(ierr)
      STOP
   END IF
 !
 !   Adjust number of levels and fill mg info.
 !
   levels = MIN(levels, get_lmax(nx), get_lmax(ny))
   info%nu1 = nu1(1)
   info%nu2 = nu2(1)
   info%mu = mu
   info%nu0 = nu0
   info%levels = levels
   info%relax = relax
   info%omega = omega
 !--------------------------------------------------------------------------------
 !                       2.0 Setup grids
 !
 !   Grid on the finest level
 !
   dx = lx/REAL(nx,8)
   dy = ly/REAL(ny,8)
   ALLOCATE(x(0:nx), y(0:ny))
   DO ix=0,nx
      x(ix) = ix*dx
   END DO
   DO iy=0,ny
      y(iy) = iy*dy
   END DO
   WRITE(*,'(a,3(1pe12.3))') 'dx, dy, dx/dy =', dx, dy, dx/dy
 !
   ALLOCATE(dense(0:nx))
   dense = fdense(x)
 !
 !   Set up grids
 !
   t0 = mpi_wtime()
   ALLOCATE(grids(levels))
   CALL create_grid_fd(x, y, grids, info, mat_type=mat_type, debug=nldebug)
   WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry'
   WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels)
   IF(nldebug) THEN
      CALL printmat('** Prolongation matrix in 1st dim.**', grids(2)%transf(1))
      CALL printmat('** Prolongation matrix in 2nd dim.**', grids(2)%transf(2))
   END IF
 !
 !   Set BC on grid transfer matrices
 !
   IF(prb.EQ.'dddd') CALL ibc_transf(grids,1,3) ! Direction X
   CALL ibc_transf(grids,2,3)                   ! Direction Y
   tsetup = mpi_wtime()-t0
 !--------------------------------------------------------------------------------
 !                       3.0 Problem discretization
 !
 !  Construct FD matrix and impose BC on all grids
 !
   t0=mpi_wtime()
   DO l=1,levels
      CALL fdmat(grids(l), fdense, icrosst)
      IF(mat_type.EQ.'csr') CALL to_mat(grids(l)%mata)
   END DO
   tmat(1) = mpi_wtime()-t0
 !
   t0=mpi_wtime()
   DO l=1,levels
      CALL ibc_fdmat(grids(l), prb)
   END DO
   tmat(2) = mpi_wtime()-t0
 !
 !   Set RHS and impose BC on the fiest grid
 !
   grids(1)%f(:,:) = frhs(x,y)
 !
   IF(prb.EQ.'dddd') THEN
      grids(1)%f(0,:) = 0.0d0   ! Dirichlet on west and east
      grids(1)%f(nx,:) = 0.0d0
   ELSE IF(prb.EQ.'nndd') THEN  ! Neumann on west and east
      grids(1)%f(0,:) = 0.5d0*grids(1)%f(0,:)
      grids(1)%f(nx,:) = 0.5d0*grids(1)%f(nx,:)
   END IF
   grids(1)%f(:,0) = 0.0d0    ! Dirichlet on south and north
   grids(1)%f(:,ny) = 0.0d0
 !
 !--------------------------------------------------------------------------------
 !                       4.0 Analytical solutions and RHS at the finest grid (l=1)
 !
   n = (nx+1)*(ny+1)              ! Number of unknowns
   ALLOCATE(sol_ana2d(0:nx,0:ny))
   sol_ana(1:n) => sol_ana2d
   sol_ana2d(:,:) = fsol(x,y)
 !--------------------------------------------------------------------------------
 !                       5.0 Direct solution at the finest grid (l=1)
 !
   IF(nldirect) THEN
      WRITE(*,'(/a)') 'Direct solution for the finest grid problem ...'
      ALLOCATE(sol_direct2d(0:nx,0:ny))
      sol_direct(1:n) => sol_direct2d
 !
      t0 = mpi_wtime()
      sol_direct = grids(1)%f1d
      CALL direct_solve(grids(1), sol_direct, debug=nldebug)
      tdirect = mpi_wtime()-t0
 !
      t0 = mpi_wtime()
      sol_direct = grids(1)%f1d
      CALL direct_solve(grids(1), sol_direct, debug=nldebug)
      tbsolve = mpi_wtime()-t0
 !
 !  Max norm and residual
 !
      err_direct = MAXVAL(ABS(sol_direct-sol_ana))
      resid_direct = residue(grids(1), grids(1)%f1d, sol_direct, 'inf')
      WRITE(*,'(a,2(1pe12.3))') 'Max norm of error and residual norm', &
           &                  err_direct, resid_direct
   END IF
 !--------------------------------------------------------------------------------
 !                       5.0 Iterative solution using MG V-cycle
 !
   WRITE(*,'(/a)') 'Multigrid MG V-cycles ...'
   ALLOCATE(errdisc(0:nits))
   ALLOCATE(resid(0:nits))
   ALLOCATE(rresid(0:nits))
 !
 !   Norm of A and b
 !
   IF(mat_type.EQ.'csr') THEN
      norma = matnorm(grids(1)%mata, 'inf')
   ELSE
      norma = matnorm(grids(1)%mata_cds, 'inf')
   END IF
   normb = MAXVAL(ABS(grids(1)%f1d))
   WRITE(*,'(a,2(1pe12.3))') 'Norm A and RHS', norma, normb
 !
 !   Initial guess
 !
   DO inu=1,nnu
      info%nu1 = nu1(inu)
      info%nu2 = nu2(inu)
      WRITE(*,'(/2(a5,i3,2x))') 'nu1 =', nu1(inu), 'nu2 =', nu2(inu)
      IF(nlfixed .AND. nldirect) THEN
         grids(1)%v = sol_direct2d
      ELSE
         grids(1)%v = 0.0d0
      END IF
 !
      errdisc(0) = MAXVAL(ABS(grids(1)%v1d-sol_ana))
      resid(0) = residue(grids(1), grids(1)%f1d, grids(1)%v1d, 'inf')
      rresid(0) = resid(0)  /  ( norma*MAXVAL(ABS(grids(1)%v1d)) + normb )
      WRITE(*,'(a4,3(a12,a8))') 'its', 'residue', 'ratio', 'disc. err', 'ratio', &
           &                           'rel. resid', 'ratio'
      WRITE(*,'(i4,3(1pe12.3,8x))') 0, resid(0), errdisc(0), rresid(0)
 !
 !   Iterations
 !
      t0 = mpi_wtime()
      DO its=1,nits
         CALL mg(grids, info, 1)
         errdisc(its) = MAXVAL(ABS(grids(1)%v1d-sol_ana))
         resid(its) = residue(grids(1), grids(1)%f1d, grids(1)%v1d, 'inf')
         rresid(its) = resid(its)  /  ( norma*MAXVAL(ABS(grids(1)%v1d)) + normb )
         WRITE(*,'((i4,3(1pe12.3,0pf8.2)))')  its, &
              &   resid(its),   resid(its)/resid(its-1), &
              &   errdisc(its), errdisc(its)/errdisc(its-1), &
              &   rresid(its),  rresid(its)/rresid(its-1)
         IF(resid(its) .LE. atol .or. rresid(its) .le. rtol) EXIT
      END DO
      niter(inu) = MIN(nits,its)
      titer(inu) = mpi_wtime() - t0
   END DO
 !--------------------------------------------------------------------------------
 !                       9.0 Epilogue
 !
 !   Display timing
 !
   WRITE(*,'(/a)') 'Timing ...'
   WRITE(*,'(a,1pe12.3,i5)') 'Setup time (s)             ', tsetup
   WRITE(*,'(a,2(1pe12.3))')  'Matrix construction time(s)', tmat
   WRITE(*,'(a,2(1pe12.3))') 'Direct and bsolve time (s) ', tdirect, tbsolve
   WRITE(*,'(/3a6,a15)') 'nu1', 'nu2', 'niter', 'Iter time(s)'
   DO inu=1,nnu
      WRITE(*,'(3i6,3x,1pe12.3)') nu1(inu), nu2(inu), niter(inu), titer(inu)
   END DO
 !
   WRITE(*,'(/a,f12.3)') 'Mem used so far (MB)', mem()
 !
 !   Creata HDF5 file
 !
   IF(me.EQ.0) CALL h5file
 !
 !   Clean up
 !
   DEALLOCATE(x)
   DEALLOCATE(y)
   DEALLOCATE(dense)
   DEALLOCATE(grids)
   DEALLOCATE(sol_ana2d)
   IF(nldirect)  DEALLOCATE(sol_direct2d)
   DEALLOCATE(errdisc)
   DEALLOCATE(resid)
   DEALLOCATE(rresid)
 !
   CALL MPI_FINALIZE(ierr)
 !--------------------------------------------------------------------------------
 CONTAINS
 !+++
   FUNCTION fdense(x)
 !
 !   Return density
 !
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: fdense(SIZE(x))
     fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 );
   END FUNCTION fdense
 !+++
   FUNCTION frhs(x,y)
 !
 !   Return RHS
 !
     DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
     DOUBLE PRECISION :: frhs(SIZE(x),SIZE(y))
     DOUBLE PRECISION :: c, s, d(SIZE(x))
     DOUBLE PRECISION :: corr
     INTEGER :: j
     corr = 1.d0+icrosst**2/4.0d0
     d(:) = fdense(x(:))
     IF(prb.EQ.'dddd') THEN
        DO j=1,SIZE(y)
           c = COS(2.0d0*pi*ky*y(j)/Ly)
           s = SIN(2.0d0*pi*ky*y(j)/Ly)
           frhs(:,j) =  -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s &
                &     -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) &
                &     + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s
        END DO
     ELSE IF (prb.EQ.'nndd') THEN
        DO j=1,SIZE(y)
           c = COS(2.0d0*pi*ky*y(j)/Ly)
           s = SIN(2.0d0*pi*ky*y(j)/Ly)
           frhs(:,j) =  -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s &
                &     +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) &
                &     + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s
        END DO
     END IF
 !!$    frhs = -frhs
   END FUNCTION frhs
 !+++
   FUNCTION fsol(x,y)
 !
 !   Return analytical solution
 !
     DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
     DOUBLE PRECISION :: fsol(SIZE(x),SIZE(y))
     DOUBLE PRECISION :: c
     INTEGER :: j
     IF(prb.EQ.'dddd') THEN
        DO j=1,SIZE(y)
           c = SIN(2.0d0*pi*ky*y(j)/Ly)
           fsol(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c
        END DO
     ELSE IF (prb.EQ.'nndd') THEN
        DO j=1,SIZE(y)
           c = SIN(2.0d0*pi*ky*y(j)/Ly)
           fsol(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c
        END DO
     END IF
   END FUNCTION fsol
 !+++
   SUBROUTINE h5file
     USE futils
     CHARACTER(len=128) :: file='poisson_mg.h5'
     INTEGER :: fid
     INTEGER :: l
     CHARACTER(len=64) :: dsname
     CALL creatf(file, fid, real_prec='d')
     CALL attach(fid, '/', 'NX', nx)
     CALL attach(fid, '/', 'NY', ny)
     CALL attach(fid, '/', 'KX', kx)
     CALL attach(fid, '/', 'KY', ky)
     CALL attach(fid, '/', 'LX', Lx)
     CALL attach(fid, '/', 'LY', Ly)
     CALL attach(fid, '/', 'BETA', beta)
     CALL attach(fid, '/', 'OMEGA', omega)
     CALL attach(fid, '/', 'RELAX', relax)
     CALL attach(fid, '/', 'MAT_TYPE', mat_type)
     CALL attach(fid, '/', 'NITS', nits)
     CALL attach(fid, '/', 'LEVELS', levels)
     CALL attach(fid, '/', 'NNU', nnu)
     CALL attach(fid, '/', 'NU0', nu0)
     CALL attach(fid, '/', 'MU', mu)
 !
     CALL putarr(fid, '/nu1', nu1(1:nnu))
     CALL putarr(fid, '/nu2', nu2(1:nnu))
     CALL putarr(fid, '/dense', dense)
     CALL creatg(fid, '/mglevels')
     DO l=1,levels
        WRITE(dsname,'("/mglevels/level.",i2.2)') l
        CALL creatg(fid, TRIM(dsname))
        IF(mat_type.EQ.'csr') THEN
           CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
        ELSE
           CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata_cds)
        END IF
        IF(l.GT.1) THEN
           CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
           CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
        END IF
        CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
        CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
        CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
        CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
        CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
        CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
     END DO
 !
 !  Solutions at finest grid
 !
     CALL creatg(fid, '/solutions')
     CALL putarr(fid, '/solutions/xg', grids(1)%x)
     CALL putarr(fid, '/solutions/yg', grids(1)%y)
     CALL putarr(fid, '/solutions/calc', grids(1)%v)
     CALL putarr(fid, '/solutions/anal', sol_ana2d)
     IF(nldirect)  CALL putarr(fid, '/solutions/direct', sol_direct2d)
 !
     nits=niter(nnu)
     CALL creatg(fid, '/Iterations')
     CALL putarr(fid, '/Iterations/residues', resid(0:nits))
     CALL putarr(fid, '/Iterations/disc_errors', errdisc(0:nits))
 !
     CALL closef(fid)
   END SUBROUTINE h5file
 !+++
 
 END PROGRAM main
diff --git a/multigrid/src/ppoisson_fd.f90 b/multigrid/src/ppoisson_fd.f90
index 0fbf675..090a006 100644
--- a/multigrid/src/ppoisson_fd.f90
+++ b/multigrid/src/ppoisson_fd.f90
@@ -1,418 +1,418 @@
 !>
 !> @file ppoisson_fd.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !    Test 2D  parallel multigrid V-cycle
 !
 MODULE mod
   USE iso_fortran_env, ONLY : rkind => real64
   IMPLICIT NONE
 !
   REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0)
 CONTAINS
 END MODULE mod
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 PROGRAM main
   USE mpi
   USE fdmat_mod, ONLY : fdmat, ibc_fdmat, ibc_rhs
   USE pputils2, ONLY  : dist1d, timera, hostlist
   USE gvector, ONLY   : gvector_2d, norm2, ASSIGNMENT(=), OPERATOR(-)
   USE parmg, ONLY     : grid2_type, mg_info, create_grid, mg, exchange, &
        &                get_resids, disp, norm_vec, norm_mat
   USE stencil, ONLY   : stencil_2d, putmat
   USE mod
   IMPLICIT NONE
 !
   INTEGER, PARAMETER :: ndims=2
 !
   INTEGER                   :: me, neighs(4), npes, ierr
   INTEGER, DIMENSION(ndims) :: coords, comm1d
   LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.]
   LOGICAL                   :: reorder =.FALSE.
   INTEGER                   :: comm_cart
   INTEGER, DIMENSION(ndims) :: e0, s0, e, s, npt_glob, npt_loc
 !
   REAL(rkind), ALLOCATABLE  :: xgrid(:), ygrid(:)
   INTEGER, ALLOCATABLE      :: id(:,:)
   REAL(rkind)               :: dx, dy
   INTEGER                   :: npoints ! Number of points in FD stencil
   REAL(rkind)               :: t_mg, t_mg0, t_mg_min, t_mg_max
 !
   TYPE(gvector_2d)          :: v_exact, resids, errs
   REAL(rkind)               :: norma, normb, normv
   REAL(rkind), ALLOCATABLE  :: resid_it(:), err_it(:), rresid(:)
   REAL(rkind)               :: ratio_err, ratio_resid, ratio_rresid
   INTEGER, DIMENSION(ndims) :: g, npt_loc_min
   INTEGER                   :: l, i, it
   CHARACTER(len=64)         :: str
 !
   TYPE(grid2_type), ALLOCATABLE :: grids(:)
   TYPE(mg_info)                 :: info    ! info for MG
 !
 !   Input quantities
 !
   LOGICAL           :: nldebug=.FALSE.
   CHARACTER(len=64) :: filein = 'ppoisson_fd.in'
   INTEGER           :: dims(2)=[0,0]
   CHARACTER(len=4)  :: prb='dddd'
   CHARACTER(len=4)  :: relax='jac'
   INTEGER           :: nx=4, ny=4    ! Number of intervals
   INTEGER           :: kx=1, ky=1
   REAL(rkind)       :: Lx=1.0, Ly=1.0
   REAL(rkind)       :: icrosst=1.0, beta=0.0, miome=200.0
   REAL(rkind)       :: omega=1.0d0
   INTEGER           :: nits=100, direct_solve_nits=5
   INTEGER           :: levels=2, nu1=3, nu2=3, mu=1, nu0=1
   REAL(rkind)       :: rtol=1.e-8, atol=1.e-8, errtol=1.e-3
 !
   NAMELIST /in/ nldebug, dims, prb, nx, ny, kx, ky, Lx, Ly, icrosst, beta, &
        &        miome, omega, nits, levels, relax, nu1, nu2, mu, nu0, &
        &        direct_solve_nits, rtol, atol, errtol
 !================================================================================
 !                        1.0  Prologue
 !
 !  2D process grid
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
   CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
 !
 !  Read input filename from commmand line argument
 !
   IF( command_argument_count() > 0 ) THEN
      CALL get_command_argument(1, filein)
   END IF
   IF(me.EQ.0) WRITE(*,'(a,a)') 'filein = ', TRIM(filein)
 !
 !  Read problem inputs
 !
   OPEN(unit=99, file=filein, form='formatted')
   READ(99,in)
   CLOSE(99)
 !
   CALL mpi_dims_create(npes, ndims, dims, ierr)
   CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,&
        &               ierr)
   CALL mpi_comm_rank(comm_cart, me, ierr)
   CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr)
   CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr)
   CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr)
 !
   CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr)
   CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr)
 !
   info%comm              = comm_cart
   info%nu1               = nu1
   info%nu2               = nu2
   info%mu                = mu
   info%nu0               = nu0
   info%levels            = levels
   info%direct_solve_nits = direct_solve_nits
   info%relax             = relax
   info%omega             = omega
 !
   IF(me.EQ.0) THEN
      WRITE(*, in)
   END IF
   IF(nldebug) THEN
      CALL hostlist(comm_cart)
   END IF
 !================================================================================
 !                        2.0  2d Grid construction
 !
 !  Partition 2D grid
 !
   CALL timera(0, 'Grid_construction')
   npt_glob(1) = nx+1
   npt_glob(2) = ny+1
   DO i=1,ndims
      CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i))
      e(i) = s(i) + npt_loc(i) - 1
   END DO
   IF(nldebug) THEN
      WRITE(*,'(a,i3.3,a,2(3i8,", "))') 'PE', me, ' coords, s,e:', &
           &  (coords(i),s(i),e(i),i=1,ndims)
   END IF
 !
 !   Global mesh
 !
   dx = Lx/REAL(nx)
   dy = Ly/REAL(ny)
   ALLOCATE(xgrid(0:nx))
   ALLOCATE(ygrid(0:ny))
   xgrid = [ (i*dx, i=0,nx) ]
   ygrid = [ (i*dy, i=0,ny) ]
 !
 !   Create grid structure
 !
   ALLOCATE(grids(levels))
   npoints = 9             ! Size of FD stencil
   ALLOCATE(id(npoints,2))
   id=RESHAPE([ 0, -1,  0,  1, -1, 1, -1, 0, 1,  & 
                0, -1, -1, -1,  0, 0,  1, 1, 1], &
               [npoints,2])
   CALL create_grid(xgrid, ygrid, s, e, id, prb, grids, comm_cart)
 !
   IF(nldebug) THEN
      DO l=1,levels
         WRITE(str,'(a,i0)') 'Number of local points at level ', l
         CALL disp(TRIM(str), grids(l)%npt_loc, comm_cart)
      END DO
   END IF
   CALL mpi_reduce(grids(levels)%npt_loc, npt_loc_min, 2, MPI_INTEGER, MPI_MIN, &
        &          0, comm_cart, ierr)
   IF(me.EQ.0) THEN
      WRITE(*,'(a,2i4)') 'Minimum local npt at coarsest grid:', npt_loc_min
   END IF
 !
   CALL timera(1, 'Grid_construction')
 !================================================================================
 !                        3.0  FD Operator
 !
   CALL timera(0, 'FD Operator')
 !
   DO l=1,levels
      CALL fdmat(grids(l),  fdense, icrosst, grids(l)%fdmat)
      CALL ibc_fdmat(grids(l)%fdmat, prb)
   END DO
 !
   CALL timera(1, 'FD Operator')
 !================================================================================
 !                        4.0 RHS and exact solution at the finest grid (l=1)
 !
 !    Allocate memory
 !
   CALL timera(0, 'RHS and exact sol')
 !
   s0 = grids(1)%s0; e0 = grids(1)%e0
   s  = grids(1)%s;  e  = grids(1)%e
   g = [1,1]
   v_exact = gvector_2d(s, e, g) ! Exact solutions   
   errs    = gvector_2d(s, e, g) ! Disc. errors
   resids  = gvector_2d(s, e, g) ! Residuals 
   ALLOCATE(resid_it(0:nits))
   ALLOCATE(rresid(0:nits))
   ALLOCATE(err_it(0:nits))
 !
 !   Set RHS at the finest grid and impose Dirichlet/Neuman BC.
 !
   grids(1)%f = frhs(xgrid(s(1):e(1)),ygrid(s(2):e(2)))
   CALL ibc_rhs(grids(1)%f, s0, e0, prb)
 !
 !   Exact solutions
 !
   v_exact = fexact(xgrid(s(1):e(1)),ygrid(s(2):e(2)))
 !
   CALL timera(1, 'RHS and exact sol')
 !================================================================================
 !                        5.0  MG V-cycle iteration loop
 !
 !   Norm of A and b
 !
   norma = norm_mat(grids(1)%fdmat, comm_cart)
   normb = norm_vec(grids(1)%f, comm_cart)
   IF(me.EQ.0) THEN
      WRITE(*,'(a,2(1pe12.3))') 'Norm A and RHS', norma, normb
   END IF
 !
   grids(1)%v = 0.0d0
   resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f)
   errs  = grids(1)%v - v_exact
   err_it(0)   = norm_vec(errs, comm_cart, root=0)
   resid_it(0) = norm_vec(resids, comm_cart)
   normv = norm_vec(grids(1)%v, comm_cart)
   rresid(0) = resid_it(0)  /  ( norma*normv + normb )
 !
   IF(me.EQ.0) THEN 
      WRITE(*,'(a4,3(a12,a8))') 'its', 'residue', 'ratio', 'disc. err', 'ratio', &
           &                           'rel. resid', 'ratio'
      WRITE(*,'(i4,3(1pe12.3,8X))') 0, resid_it(0), err_it(0),  rresid(0)
   END IF
 !
   CALL timera(0, 'MG V-cycle loop')
   t_mg = 0.0d0
   DO it=1,nits
      t_mg0 = mpi_wtime()
      CALL mg(grids, info, 1)
      t_mg = t_mg + (mpi_wtime()-t_mg0)
      resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f)
      errs  = grids(1)%v - v_exact
      err_it(it)   = norm_vec(errs, comm_cart)
      resid_it(it) = norm_vec(resids, comm_cart)
      normv = norm_vec(grids(1)%v, comm_cart)
      rresid(it)  = resid_it(it)  /  ( norma*normv + normb )
      ratio_err   = err_it(it)/err_it(it-1)
      ratio_resid = resid_it(it)/resid_it(it-1)
      ratio_rresid= rresid(it)/ rresid(it-1)
      IF(me.EQ.0) THEN 
         WRITE(*,'(i4,3(1pe12.3,0pf8.2))') it, &
              &   resid_it(it), ratio_resid,&
              &   err_it(it),   ratio_err, &
              &   rresid(it),   ratio_rresid
      END IF
      IF(resid_it(it) .LE. atol .OR. rresid(it) .LE. rtol .OR. &
           &  ABS(ratio_err-1._rkind).LT.errtol) THEN 
         nits = it
         EXIT
      END IF
   END DO
 !
   CALL timera(1, 'MG V-cycle loop')
   CALL mpi_reduce(t_mg, t_mg_max, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm_cart, ierr)
   CALL mpi_reduce(t_mg, t_mg_min, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm_cart, ierr)
   IF(me.EQ.0) THEN
      WRITE(*,'(a,2(1pe10.3)/)') 'Minmax of MG (only) time (s):', t_mg_min, t_mg_max
   END IF
 !================================================================================
 !                        9.0  Epilogue
 !
   IF(nldebug) THEN
      CALL h5file
   END IF
 !
   CALL timera(9, '')
   CALL MPI_FINALIZE(ierr)
 CONTAINS
 !
 !+++
   FUNCTION fdense(x)
 !
 !   Return density
 !
     REAL(rkind), INTENT(in) :: x(:)
     REAL(rkind)              :: fdense(SIZE(x))
     fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 );
   END FUNCTION fdense
 !+++
   FUNCTION fexact(x,y)
 !
 !   Return analytical solution
 !
     REAL(rkind), INTENT(in) :: x(:), y(:)
     REAL(rkind) :: fexact(SIZE(x),SIZE(y))
     REAL(rkind) :: c
     INTEGER :: j
     IF(prb.EQ.'dddd') THEN
        DO j=1,SIZE(y)
           c = SIN(2.0d0*pi*ky*y(j)/Ly)
           fexact(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c
        END DO
     ELSE IF (prb.EQ.'nndd') THEN
        DO j=1,SIZE(y)
           c = SIN(2.0d0*pi*ky*y(j)/Ly)
           fexact(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c
        END DO
     END IF
   END FUNCTION fexact
 !+++
   FUNCTION frhs(x,y)
 !
 !   Return RHS
 !
     REAL(rkind), INTENT(in) :: x(:), y(:)
     REAL(rkind) :: frhs(SIZE(x),SIZE(y))
     REAL(rkind) :: c, s, d(SIZE(x))
     REAL(rkind) :: corr
     INTEGER :: j
     corr = 1.d0+icrosst**2/4.0d0
     d(:) = fdense(x(:))
     IF(prb.EQ.'dddd') THEN
        DO j=1,SIZE(y)
           c = COS(2.0d0*pi*ky*y(j)/Ly)
           s = SIN(2.0d0*pi*ky*y(j)/Ly)
           frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s &
                &     -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) &
                &     + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s
        END DO
     ELSE IF (prb.EQ.'nndd') THEN
        DO j=1,SIZE(y)
           c = COS(2.0d0*pi*ky*y(j)/Ly)
           s = SIN(2.0d0*pi*ky*y(j)/Ly)
           frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s &
                &     +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) &
                &     + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s
        END DO
     END IF
   END FUNCTION frhs
 !+++!
   FUNCTION outerprod(x, y) RESULT(r)
 !
 !  outer product
 !
     REAL(rkind), INTENT(in) :: x(:), y(:)
     REAL(rkind)             :: r(SIZE(x),SIZE(y))
     INTEGER :: i, j
     DO j=1,SIZE(y)
        DO i=1,SIZE(x)
           r(i,j) = x(i)*y(j)
        END DO
     END DO
   END FUNCTION outerprod
 !+++
   SUBROUTINE h5file
 !
 !  Result hdf5 file
 !
     USE futils
     CHARACTER(len=128) :: file='ppoisson_fd.h5'
     INTEGER :: fid
     CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart)
     CALL attach(fid, '/', 'PRB', prb)
     CALL attach(fid, '/', 'RELAX', relax)
     CALL attach(fid, '/', 'NX', nx)
     CALL attach(fid, '/', 'NY', ny)
     CALL attach(fid, '/', 'KX', kx)
     CALL attach(fid, '/', 'KY', ky)
     CALL attach(fid, '/', 'LX', Lx)
     CALL attach(fid, '/', 'LY', Ly)
     CALL attach(fid, '/', 'ICROSST', icrosst)
     CALL attach(fid, '/', 'BETA', beta)
     CALL attach(fid, '/', 'MIOME', miome)
     CALL attach(fid, '/', 'OMEGA', omega)
     CALL attach(fid, '/', 'NITS', nits)
     CALL attach(fid, '/', 'DIRECT_SOLVE_NITS', direct_solve_nits)
     CALL attach(fid, '/', 'LEVELS', levels)
     CALL attach(fid, '/', 'NU1', nu1)
     CALL attach(fid, '/', 'NU2', nu2)
     CALL attach(fid, '/', 'NU0', nu0)
     CALL attach(fid, '/', 'MU', mu)
 !
     CALL putarr(fid, '/xgrid', xgrid, ionode=0)  ! only rank 0 does IO
     CALL putarr(fid, '/ygrid', ygrid, ionode=0)  ! only rank 0 does IO
 !
     CALL putarrnd(fid, '/f', grids(1)%f%val, (/1,2/), garea=g)
     CALL putarrnd(fid, '/v', v_exact%val, (/1,2/), garea=g)
     CALL putarrnd(fid, '/u', grids(1)%v%val, (/1,2/), garea=g)
     CALL putarrnd(fid, '/errs',   errs%val,  (/1,2/), garea=(/1,1/))
     CALL putarrnd(fid, '/resids', resids%val,(/1,2/), garea=(/1,1/))
 !
     CALL putarr(fid, '/resid', resid_it(0:nits), ionode=0)
     CALL putarr(fid, '/error', err_it(0:nits), ionode=0)
 !
     CALL putmat(fid, '/MAT', grids(1)%fdmat)
     CALL closef(fid)
   END SUBROUTINE h5file
 !+++
 END PROGRAM main
diff --git a/multigrid/src/stencil_mod.f90 b/multigrid/src/stencil_mod.f90
index 04095c5..cd2618a 100644
--- a/multigrid/src/stencil_mod.f90
+++ b/multigrid/src/stencil_mod.f90
@@ -1,243 +1,243 @@
 !>
 !> @file stencil_mod.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE stencil
 !
 !   stencil_2d: Implement 2D stencil for matrix-less operations
 !
 !   T.M. Tran, CRPP-EPFL
 !   August 2013
 !
   USE iso_fortran_env, ONLY : rkind => real64
   IMPLICIT NONE
 !
   PRIVATE
   PUBLIC :: stencil_2d, init, vmx, putmat, laplacian, &
        &    OPERATOR(*)
 !
   TYPE stencil_2d
      LOGICAL                  :: nluni
      INTEGER, DIMENSION(2)    :: ldim, gdim
      INTEGER, DIMENSION(2)    :: s0, e0, s, e
      INTEGER                  :: npoints
      INTEGER, ALLOCATABLE     :: id(:,:)
      REAL(rkind), ALLOCATABLE :: val(:,:,:)
   END TYPE stencil_2d
 !
   INTERFACE init
      MODULE PROCEDURE init_stencil_2d
   END INTERFACE init
   INTERFACE vmx
      MODULE PROCEDURE vmx_stencila_2d
      MODULE PROCEDURE vmx_stencilg_2d
   END INTERFACE vmx
   INTERFACE putmat
      module procedure putmat_stencil
   END INTERFACE putmat
 !
   INTERFACE OPERATOR(*)
      MODULE PROCEDURE vmx_stencila_2d
      MODULE PROCEDURE vmx_stencilg_2d
   END INTERFACE OPERATOR(*)
 !
 CONTAINS
 !================================================================================
   SUBROUTINE init_stencil_2d(s, e, id, nluni, mat, comm)
 !
 !  stencil_2d constructor
 !
     USE mpi
     INTEGER, INTENT(in)           :: s(2), e(2) ! Bounds in each dim.
     INTEGER, INTENT(in)           :: id(:,:)    ! Structure of stencil
     LOGICAL, INTENT(in)           :: nluni      ! Uniform stencil
     TYPE(stencil_2d), INTENT(out) :: mat
     INTEGER, INTENT(in)           :: comm
     INTEGER :: me, ndim=2, ierr
     INTEGER :: npoints    ! Size of the stencil
 !
     CALL mpi_comm_rank(comm, me, ierr)
 !
     IF(id(1,1).NE.0 .AND. id(1,2).NE.0) THEN
        IF(me.EQ.0) THEN
           WRITE(*,*) 'INIT_STENCIL: id(1,:) should be (0,0)!'
           CALL mpi_abort(comm, -1, ierr)
        END IF
     END IF
 !
     npoints = SIZE(id,1)
     mat%npoints = npoints
     mat%s = s
     mat%e = e
     mat%nluni = nluni
     IF(nluni) THEN
        ALLOCATE(mat%val(1,1, 0:npoints-1))
     ELSE
        ALLOCATE(mat%val(s(1):e(1), s(2):e(2), 0:npoints-1))
     END IF
     ALLOCATE(mat%id(0:npoints-1, ndim))
     mat%id(:,:) = id(:,:)
     mat%val(:,:,:) = 0.0
 !
     mat%ldim = e-s+1
     CALL mpi_allreduce(mat%s, mat%s0, ndim, MPI_INTEGER, MPI_MIN, comm, ierr)
     CALL mpi_allreduce(mat%e, mat%e0, ndim, MPI_INTEGER, MPI_MAX, comm, ierr)
     mat%gdim = mat%e0 - mat%s0 + 1
 !
   END SUBROUTINE init_stencil_2d
 !================================================================================
   FUNCTION vmx_stencila_2d(mat, xarr) RESULT(res)
 !
 !   Return product res = mat*x, where x and res are simple arrays
 !
     TYPE(stencil_2d), INTENT(in)         :: mat
     REAL(rkind), ALLOCATABLE, INTENT(in) :: xarr(:,:)
     REAL(rkind)           :: res(LBOUND(xarr,1):UBOUND(xarr,1), &
          &                   LBOUND(xarr,2):UBOUND(xarr,2))
     INTEGER               :: k, i, j
     INTEGER, DIMENSION(2) :: smin, emax, d, lb, ub
 !
     smin(:) = mat%s0(:)
     emax(:) = mat%e0(:)
     res = 0.0
     DO k=0,mat%npoints-1
        d(:) = mat%id(k,:)
        lb = MAX(smin, smin-d, mat%s)
        ub = MIN(emax, emax-d, mat%e)
        DO j=lb(2),ub(2)
           DO i=lb(1),ub(1)
              res(i,j) = res(i,j) + mat%val(i,j,k)*xarr(i+d(1),j+d(2))
           END DO
        END DO
     END DO
   END FUNCTION vmx_stencila_2d
 !================================================================================
   FUNCTION vmx_stencilg_2d(mat, xarr) RESULT(res)
 !
 !   Return product res= mat*x, where x and res are gvectors
 !
     USE gvector, ONLY : gvector_2d
     TYPE(stencil_2d), INTENT(in) :: mat
     TYPE(gvector_2d), INTENT(in) :: xarr
     TYPE(gvector_2d)             :: res
     INTEGER               :: k, i, j
     INTEGER, DIMENSION(2) :: d, s, e
 !
     s = xarr%s
     e = xarr%e
     res = gvector_2d(xarr%s, xarr%e, xarr%g)
 !
 ! Diagonal contributions: d(0) = (0,0)
     DO j=s(2),e(2)
        DO i=s(1),e(1)
           res%val(i,j) = mat%val(i,j,0)*xarr%val(i,j)
        END DO
     END DO
 !
     DO k=1,mat%npoints-1
        d(:) = mat%id(k,:)
        DO j=s(2),e(2)
           DO i=s(1),e(1)
              res%val(i,j) = res%val(i,j) + mat%val(i,j,k)*xarr%val(i+d(1),j+d(2))
           END DO
        END DO
     END DO
   END FUNCTION vmx_stencilg_2d
 !================================================================================
   SUBROUTINE putmat_stencil(fid, label, mat, str)
     USE futils
     INTEGER, INTENT(in)                    :: fid
     CHARACTER(len=*), INTENT(in)           :: label
     TYPE(stencil_2d), INTENT(in)           :: mat
     CHARACTER(len=*), INTENT(in), OPTIONAL :: str
 !
     IF(PRESENT(str)) THEN
        CALL creatg(fid, label, str)
     ELSE
        CALL creatg(fid, label)
     END IF
 !
     CALL putarr(fid, TRIM(label)//'/dists', mat%id, ionode=0)
     CALL putarrnd(fid, TRIM(label)//'/val',   mat%val, (/1,2/))
   END SUBROUTINE putmat_stencil
 !=======================================================================
   SUBROUTINE laplacian(dx, dy, mat)
 !
 !   Construct a Laplacian using 5-point FD discretization
 !   Assume homegeneous Dirichlet BC on all 4 faces.
 !
     REAL(rkind), INTENT(in)         :: dx, dy
     TYPE(stencil_2d), INTENT(inout) :: mat
 !
     INTEGER               :: i, j, k
     INTEGER               :: ieast, iwest, jsouth, jnorth
     INTEGER, DIMENSION(2) :: d
     REAL(rkind)           :: dx2inv, dy2inv
 !
 !   Assemble the stencil
 !
     dx2inv = 1.0d0/dx**2
     dy2inv = 1.0d0/dy**2
 !
     mat%val(:,:,0) = -2.0d0*(dx2inv+dy2inv) ! Diagonal
 !
     DO k=1,mat%npoints-1  ! Off diagonal
        d = mat%id(k,:)
        DO j=mat%s(2),mat%e(2)
           DO i=mat%s(1),mat%e(1)
              IF(d(1).EQ.0) THEN            ! north and south
                 mat%val(i,j,k) = dy2inv
              ELSE IF(d(2).EQ.0) THEN       ! east and west
                 mat%val(i,j,k) = dx2inv
              END IF
           END DO
        END DO
     END DO
 !
 !   Impose Dirichlet BC on all 4 boundaries
 !
     ieast = mat%s0(1)
     IF(ieast .EQ. mat%s(1)) THEN   ! East boundary
        mat%val(ieast, mat%s(2):mat%e(2), :) = 0.0
        mat%val(ieast, mat%s(2):mat%e(2), 0) = 1.0
     END IF
     iwest = mat%e0(1)
     IF(iwest .EQ. mat%e(1)) THEN   ! West boundary
        mat%val(iwest, mat%s(2):mat%e(2), :) = 0.0
        mat%val(iwest, mat%s(2):mat%e(2), 0) = 1.0
     END IF
     jsouth = mat%s0(2)
     IF(jsouth .EQ. mat%s(2)) THEN  ! South boundary
        mat%val(mat%s(1):mat%e(1), jsouth, :) = 0.0
        mat%val(mat%s(1):mat%e(1), jsouth, 0) = 1.0
     END IF
     jnorth = mat%e0(2)
     IF(jnorth .EQ. mat%e(2)) THEN  ! North boundary
        mat%val(mat%s(1):mat%e(1), jnorth, :) = 0.0
        mat%val(mat%s(1):mat%e(1), jnorth, 0) = 1.0
     END IF
 !
   END SUBROUTINE laplacian
 !================================================================================
 END MODULE stencil
diff --git a/multigrid/src/test_csr.f90 b/multigrid/src/test_csr.f90
index 02172b2..bd6b2e1 100644
--- a/multigrid/src/test_csr.f90
+++ b/multigrid/src/test_csr.f90
@@ -1,145 +1,145 @@
 !>
 !> @file test_csr.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test routines of module csr_mod
 !
   USE multigrid
   USE csr
   IMPLICIT NONE
 !
   INTEGER :: nx=8, nidbas=1, alpha=0, modem=10
   DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0
   LOGICAL :: nlper=.FALSE.
   INTEGER :: ngauss, nterms
   INTEGER :: i, j
 !
   TYPE(grid1d)  :: gridx(1)
   TYPE(csr_mat) :: mata
 !
   DOUBLE PRECISION, ALLOCATABLE :: arow(:), sum_row(:)
   DOUBLE PRECISION, ALLOCATABLE :: acol(:), sum_col(:)
   DOUBLE PRECISION, ALLOCATABLE :: sol(:), rhs(:)
 !
   NAMELIST /newrun/ nx, nidbas, sigma, kmode, modem, alpha, nlper
 !----------------------------------------------------------------------------
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Set grid
 !
   ngauss = CEILING(REAL(2*nidbas+alpha+1,8)/2.d0)
   CALL create_grid(nx, nidbas, ngauss, alpha, gridx, nlper)
 !
 !   Create FE matrice and set BC u(0)=u(1)=0
 !
   nterms = 3
   CALL femat(gridx(1)%spl, mata, coefeq, nterms)
   CALL to_mat(mata)
   WRITE(*,'(/a,2i6)') 'rank, nnz', mata%rank, mata%nnz
   WRITE(*,'(a/(12(1pe12.3)))')   'diag', mata%val(mata%idiag)
   ALLOCATE(arow(mata%rank))
   ALLOCATE(acol(mata%rank))
   ALLOCATE(sum_row(mata%rank))
   ALLOCATE(sum_col(mata%rank))
   sum_col = 0.0d0
   DO i=1,mata%rank
      CALL getrow(mata, i, arow)
      sum_row(i) = SUM(arow)
      sum_col = sum_col+arow
      IF(i.EQ.1) WRITE(*,'(/a)') 'Matrix A'
      WRITE(*,'(12(1pe12.3))')  arow
   END DO
   WRITE(*,'(a/(12(1pe12.3)))')   'sum of row', sum_row
   WRITE(*,'(a/(12(1pe12.3)))')   'sum of col', sum_col
   DO j=1,mata%rank
      CALL getcol(mata, j, acol)
      sum_col(j) = SUM(acol)
   END DO
   WRITE(*,'(a/(12(1pe12.3)))')   'sum of col', sum_col
 !
 !   Clear and rebuild matrix
 !
   WRITE(*,'(/a)') 'Clear and rebuild matrix ...'
   CALL clear_mat(mata)
   CALL femat(gridx(1)%spl, mata, coefeq, nterms)
   WRITE(*,'(a,2i6)') 'rank, nnz', mata%rank, mata%nnz
   DO i=1,mata%rank
      CALL getrow(mata, i, arow)
      WRITE(*,'(12(1pe12.3))')  arow
   END DO
   WRITE(*,'(a/(12(1pe12.3)))')   'diag', mata%val(mata%idiag)
 !
 !   Test VMX
 !
   ALLOCATE(sol(mata%rank))
   ALLOCATE(rhs(mata%rank))
   sol = 1.0d0
 !
   rhs = vmx(mata, sol)
   acol = rhs-sum_row
   WRITE(*,'(/a)') 'Test VMX ...'
   WRITE(*,'(a/(12(1pe12.3)))') 'amat*sol', rhs  
   WRITE(*,'(a,1pe12.3)') 'Error norm =', SQRT(DOT_PRODUCT(acol,acol))
 !
   rhs = vmx(mata, sol, 'T')
   acol = rhs-sum_col
   WRITE(*,'(a/(12(1pe12.3)))') "amat'*sol", rhs
   WRITE(*,'(a,1pe12.3)') 'Error norm =', SQRT(DOT_PRODUCT(acol,acol))
 !
   CALL destroy(mata)
 CONTAINS
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
     SELECT CASE (alpha)
     CASE(0)           ! Cartesian geometry
        c(1) = 1.0d0
        idt(1) = 1
        idw(1) = 1
        c(2) = sigma
        idt(2) = 0
        idw(2) = 0
        c(3) = 1.0d0
        idt(3) = 1
        idw(3) = 0
     CASE(1)
        c(1) = x
        idt(1) = 1
        idw(1) = 1
        c(2) = modem**2/x
        idt(2) = 0
        idw(2) = 0
        c(3) = 1.0d0
        idt(3) = 1
        idw(3) = 0
     CASE default
        WRITE(*,'(a,i0,a)') 'COEFEQ: alpha ', alpha, ' not defined!'
     END SELECT
   END SUBROUTINE coefeq
 !----------------------------------------------------------------------------
 END PROGRAM main
diff --git a/multigrid/src/test_gvec1d.f90 b/multigrid/src/test_gvec1d.f90
index 1dd22f5..d67e45a 100644
--- a/multigrid/src/test_gvec1d.f90
+++ b/multigrid/src/test_gvec1d.f90
@@ -1,190 +1,190 @@
 !>
 !> @file test_gvec1d.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !    Test implementation of 1D vectors with arbirary
 !    vector bounds and ghost cell width.
 !
 !  T.M. Tran (09/2013)
 !
 MODULE gvector
   USE iso_fortran_env, ONLY : rkind => real64
   IMPLICIT NONE
   PRIVATE
   PUBLIC :: rkind, gvector_1d, disp, norm2, &
        &    OPERATOR(+), OPERATOR(-), OPERATOR(*), &
        &    ASSIGNMENT(=)
 
   TYPE gvector_1d
      INTEGER :: s, e, g
      REAL(rkind), ALLOCATABLE :: val(:)
   END TYPE gvector_1d
 
   INTERFACE gvector_1d
      MODULE PROCEDURE constructor
   END INTERFACE gvector_1d
   INTERFACE OPERATOR(+)
      MODULE PROCEDURE add_scal
      MODULE PROCEDURE add_vec
   END INTERFACE OPERATOR(+)
   INTERFACE OPERATOR(-)
      MODULE PROCEDURE minus_vec
      MODULE PROCEDURE substract_vec
   END INTERFACE OPERATOR(-)
   INTERFACE OPERATOR(*)
      MODULE PROCEDURE scale_left
      MODULE PROCEDURE scale_right
   END INTERFACE OPERATOR(*)
   INTERFACE ASSIGNMENT(=)
      MODULE PROCEDURE from_scal
      MODULE PROCEDURE from_vec
   END INTERFACE ASSIGNMENT(=)
   INTERFACE norm2
      module procedure norm2_gvector_1d
   END INTERFACE norm2
 
 CONTAINS
   FUNCTION constructor(s, e, g) RESULT(res)
     INTEGER, INTENT(in)           :: s, e
     INTEGER, OPTIONAL, INTENT(in) :: g
     TYPE(gvector_1d)              :: res
     INTEGER :: lb, ub
     res%g= 0
     IF(PRESENT(g)) res%g=g
     res%s=s
     res%e=e
     lb = res%s-res%g
     ub = res%e+res%g
     ALLOCATE(res%val(lb:ub))
     res%val = -9999.0
   END FUNCTION constructor
 
   FUNCTION add_vec(lhs, rhs) RESULT(res)
     TYPE(gvector_1d), INTENT(in) :: lhs, rhs
     TYPE(gvector_1d)             :: res
     res = gvector_1d(lhs%s, lhs%e, lhs%g)
     res%val(res%s:res%e) = lhs%val(res%s:res%e) + rhs%val(res%s:res%e)
   END FUNCTION add_vec
 
   FUNCTION add_scal(lhs, rhs) RESULT(res)
     TYPE(gvector_1d), INTENT(in) :: lhs
     REAL(rkind), INTENT(in)      :: rhs
     TYPE(gvector_1d)             :: res
     res = gvector_1d(lhs%s, lhs%e, lhs%g)
     res%val(res%s:res%e) = lhs%val(res%s:res%e) + rhs
   END FUNCTION add_scal
 
   FUNCTION minus_vec(this) RESULT(res)
     TYPE(gvector_1d), INTENT(in) :: this
     TYPE(gvector_1d)             :: res
     res = gvector_1d(this%s, this%e, this%g)
     res%val(res%s:res%e) = -this%val(res%s:res%e)
   END FUNCTION minus_vec
 
   FUNCTION substract_vec(lhs, rhs) RESULT(res)
     TYPE(gvector_1d), INTENT(in) :: lhs, rhs
     TYPE(gvector_1d)             :: res
     res = gvector_1d(lhs%s, lhs%e, lhs%g)
     res = lhs + (-rhs)
   END FUNCTION substract_vec
 
   FUNCTION scale_left(lhs, rhs) RESULT(res)
     REAL(rkind), INTENT(in)      :: lhs
     TYPE(gvector_1d), INTENT(in) :: rhs
     TYPE(gvector_1d)             :: res
     res = gvector_1d(rhs%s, rhs%e, rhs%g)
     res%val(res%s:res%e) = lhs * rhs%val(res%s:res%e)
   END FUNCTION scale_left
 
   FUNCTION scale_right(lhs, rhs) RESULT(res)
     TYPE(gvector_1d), INTENT(in) :: lhs
     REAL(rkind), INTENT(in)      :: rhs
     TYPE(gvector_1d)             :: res
     res = gvector_1d(lhs%s, lhs%e, lhs%g)
     res%val(res%s:res%e) = rhs * lhs%val(res%s:res%e)
   END FUNCTION scale_right
 
   SUBROUTINE from_vec(lhs, rhs)
     TYPE(gvector_1d), INTENT(inout) :: lhs
     REAL(rkind), INTENT(in)         :: rhs(:)
     INTEGER :: n
     n = lhs%e - lhs%s + 1
     IF(SIZE(rhs) .NE. n) THEN
        PRINT*, 'from_vec: sizes of rhs and lhs not equal!'
        STOP
     END IF
     lhs%val(lhs%s:lhs%e) = rhs(1:n)
   END SUBROUTINE from_vec
 
   SUBROUTINE from_scal(lhs, rhs)
     TYPE(gvector_1d), INTENT(inout) :: lhs
     REAL(rkind), INTENT(in)         :: rhs
     lhs%val(lhs%s:lhs%e) = rhs
   END SUBROUTINE from_scal
 
   SUBROUTINE disp(str,this)
     CHARACTER(len=*), INTENT(in) :: str
     TYPE(gvector_1d), INTENT(in) :: this
     WRITE(*,'(/a,3i6)') str//': s, e, g =', this%s, this%e, this%g
     WRITE(*,'(10(1pe12.3))') this%val
   END SUBROUTINE disp
 
   FUNCTION norm2_gvector_1d(this) RESULT(res)
     TYPE(gvector_1d), INTENT(in) :: this
     REAL(rkind)                  :: res
     res = NORM2(this%val(this%s:this%e))
   END FUNCTION norm2_gvector_1d
 END MODULE gvector
 
 PROGRAM main
   USE gvector
   IMPLICIT NONE
   INTEGER :: s=0, e=5, g=1
   INTEGER :: i, lb, ub
   REAL(rkind) :: a=0.1
   TYPE(gvector_1d) :: v1, v2, v3
 !
   lb = s-g
   ub = e+g
   v1 = gvector_1d(s, e, g)
   v1%val(s:e) = [ (i, i=s,e) ]
   CALL disp('v1', v1)
 !
   v2 = v1 + a*v1
   CALL disp('v1+a*v1', v2)
 !
   v3 = v1 - v1*a
   CALL disp('v1-v1*a', v3)
 !
   WRITE(*,'(a,1pe12.3)') 'norm of v1      =', NORM2(v1)
   WRITE(*,'(a,1pe12.3)') 'norm of v1-a*v1 =', NORM2(v1-a*v1)
 !
   v1 = 0.0d0
   CALL disp('Should be all zero', v1)
   v2 = [ 1.d0, 2.d0, 3.d0, 4.d0, 5.d0, 6.d0 ]
   CALL disp('Should be (1. 2. 3. 4. 5. 6.)', v2)
 
 END PROGRAM main
 
diff --git a/multigrid/src/test_intergrid0.f90 b/multigrid/src/test_intergrid0.f90
index 7a76d32..f08d611 100644
--- a/multigrid/src/test_intergrid0.f90
+++ b/multigrid/src/test_intergrid0.f90
@@ -1,230 +1,230 @@
 !>
 !> @file test_intergrid0.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   intergrid transfer using *serial* multigrid module:
 !     - restriction of rhs
 !     - prolongation of sol
 !
   USE multigrid, ONLY : grid2d, mg_info, &
        &                get_lmax, create_grid_fd, ibc_transf, &
        &                prolong, restrict
   IMPLICIT NONE
   DOUBLE PRECISION, PARAMETER :: pi=4.0d0*ATAN(1.0d0)
   DOUBLE PRECISION :: Lx, Ly, kx, ky, icrosst, beta, miome
   INTEGER :: nx, ny, levels
   CHARACTER(len=4) :: prb
   LOGICAL :: nldebug
 !
   DOUBLE PRECISION :: dx, dy
   DOUBLE PRECISION, ALLOCATABLE :: x(:),y(:)
 !
   TYPE(mg_info) :: info  ! info for MG
   TYPE(grid2d), ALLOCATABLE :: grids(:)
 !
   INTEGER :: i, l
 !
   NAMELIST /parameters/ prb, nx, ny, levels, Lx, Ly, kx, ky, icrosst, beta, &
        &                miome, nldebug
 !--------------------------------------------------------------------------------
 !
 !   Default inputs
 !
   nx=32
   ny=32
   levels = 2
   kx=1
   ky=1
   icrosst=1.0d0
   Lx = 1.0D0
   Ly = 1.0D0
   miome = 200d0
   beta = 0d0
   prb = 'dddd'
   nldebug = .FALSE.
 !
   READ(*,parameters)
   WRITE(*,parameters)
 !
 !   Fine grid
 !
   dx = lx/REAL(nx,8)
   dy = ly/REAL(ny,8)
   ALLOCATE(x(0:nx), y(0:ny))
   x = dx * [(i,i=0,nx)]
   y = dy * [(i,i=0,ny)]
   WRITE(*,'(a/10(1pe12.3))') 'x =', x
   WRITE(*,'(a/10(1pe12.3))') 'y =', y
 !
 !   Create array of grids
 !
   levels = MIN(levels, get_lmax(nx), get_lmax(ny))
   WRITE(*,'(a,i4)') 'Number of levels', levels
   ALLOCATE(grids(levels))
   info%nu1 = 1
   info%nu2 = 1
   info%mu = 1
   info%nu0 = 1
   info%levels = levels
   info%relax = 'jac'
   info%omega = 1
   CALL create_grid_fd(x, y, grids, info, mat_type='cds', debug=nldebug)
 !
 !   Set BC on grid transfer matrices
 !
   IF(prb.EQ.'dddd') CALL ibc_transf(grids,1,3) ! Direction X
   CALL ibc_transf(grids,2,3)                   ! Direction Y
 !
 !   Define RHS at l=1, compute RHS at l=2,...,levels by "restriction".
 !
   grids(1)%f(:,:) = frhs(grids(1)%x,grids(1)%y)
   DO l=2,levels
      grids(l)%f = restrict(grids(l)%matp, grids(l-1)%f)
      grids(l)%f = 0.25d0*grids(l)%f  ! Scaling for FD
   END DO
 !
 !   Define SOL at l=levels, compute SOL at l=levels-1,..,1 by "prolongation"
 !
   grids(levels)%v(:,:) = fsol(grids(levels)%x,grids(levels)%y)
   DO l=levels-1,1,-1
      grids(l)%v = prolong(grids(l+1)%matp, grids(l+1)%v)
   END DO
 !
   IF(nldebug) THEN
      DO l=1,levels
         WRITE(*,'(a,i3)') '==== Level', l
         WRITE(*,'(a)') 'f ='
         DO i=0,grids(l)%n(1)
            WRITE(*,'(10f8.3)') grids(l)%f(i,:)
         END DO
         WRITE(*,'(a)') 'v ='
         DO i=0,grids(l)%n(1)
            WRITE(*,'(10f8.3)') grids(l)%v(i,:)
         END DO
      END DO
   END IF
 !
 !   Epilogue
 !
   CALL h5file
 !--------------------------------------------------------------------------------
 CONTAINS
 !+++
   FUNCTION fdense(x)
 !
 !   Return density
 !
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: fdense(SIZE(x))
     fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 );
   END FUNCTION fdense
 !+++
   FUNCTION frhs(x,y)
 !
 !   Return RHS
 !
     DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
     DOUBLE PRECISION :: frhs(SIZE(x),SIZE(y))
     DOUBLE PRECISION :: c, s, d(SIZE(x))
     DOUBLE PRECISION :: corr
     INTEGER :: j
     corr = 1.d0+icrosst**2/4.0d0
     d(:) = fdense(x(:))
     IF(prb.EQ.'dddd') THEN
        DO j=1,SIZE(y)
           c = COS(2.0d0*pi*ky*y(j)/Ly)
           s = SIN(2.0d0*pi*ky*y(j)/Ly)
           frhs(:,j) =  -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s &
                &     -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) &
                &     + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s
        END DO
     ELSE IF (prb.EQ.'nndd') THEN
        DO j=1,SIZE(y)
           c = COS(2.0d0*pi*ky*y(j)/Ly)
           s = SIN(2.0d0*pi*ky*y(j)/Ly)
           frhs(:,j) =  -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s &
                &     +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) &
                &     + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s
        END DO
     END IF
   END FUNCTION frhs
 !+++
   FUNCTION fsol(x,y)
 !
 !   Return analytical solution
 !
     DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
     DOUBLE PRECISION :: fsol(SIZE(x),SIZE(y))
     DOUBLE PRECISION :: c
     INTEGER :: j
     IF(prb.EQ.'dddd') THEN
        DO j=1,SIZE(y)
           c = SIN(2.0d0*pi*ky*y(j)/Ly)
           fsol(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c
        END DO
     ELSE IF (prb.EQ.'nndd') THEN
        DO j=1,SIZE(y)
           c = SIN(2.0d0*pi*ky*y(j)/Ly)
           fsol(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c
        END DO
     END IF
   END FUNCTION fsol
 !+++
   SUBROUTINE h5file
     USE futils
     USE csr, ONLY : putmat
     CHARACTER(len=128) :: file='test_intergrid0.h5'
     INTEGER :: fid
     INTEGER :: l
     CHARACTER(len=64) :: dsname
     CALL creatf(file, fid, real_prec='d')
     CALL attach(fid, '/', 'NX', nx)
     CALL attach(fid, '/', 'NY', ny)
     CALL attach(fid, '/', 'KX', kx)
     CALL attach(fid, '/', 'KY', ky)
     CALL attach(fid, '/', 'LX', Lx)
     CALL attach(fid, '/', 'LY', Ly)
     CALL attach(fid, '/', 'BETA', beta)
     CALL attach(fid, '/', 'LEVELS', levels)
     CALL attach(fid, '/', 'PRB', prb)
     CALL attach(fid, '/', 'NLDEBUG', nldebug)
     CALL creatg(fid, '/mglevels')
     DO l=1,levels
        WRITE(dsname,'("/mglevels/level.",i2.2)') l
        CALL creatg(fid, TRIM(dsname))
        IF(l.GT.1) THEN
           CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
           CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
        END IF
        CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
        CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
        CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
        CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
     END DO
     CALL closef(fid)
   END SUBROUTINE h5file
 !+++
 END PROGRAM main
diff --git a/multigrid/src/test_intergrid1.f90 b/multigrid/src/test_intergrid1.f90
index 0d0a23b..514ad2f 100644
--- a/multigrid/src/test_intergrid1.f90
+++ b/multigrid/src/test_intergrid1.f90
@@ -1,240 +1,240 @@
 !>
 !> @file test_intergrid1.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test implementation of (parallel) matrix-free
 !
   USE iso_fortran_env, ONLY : rkind => real64
   USE parmg, ONLY    : grid2_type, init_restrict, coarse, get_lmax, &
        &               exchange, prolong, restrict, disp, norm_vec
   USE pputils2, ONLY : dist1d
   USE gvector, ONLY  : gvector_2d,OPERATOR(-)
   USE futils
   USE mpi
   IMPLICIT NONE
 !
   INTEGER, PARAMETER :: ndims=2
   INTEGER                   :: ierr, me, npes
   INTEGER, DIMENSION(ndims) :: dims=[0,0]
   INTEGER, DIMENSION(ndims) :: lmax, coords, comm1d
   LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.]
   LOGICAL                   :: reorder =.FALSE.
   INTEGER                   :: comm_cart, comm_futils
 !
   INTEGER           :: fin
   CHARACTER(len=64) :: filein = 'test_intergrid0.h5'
   CHARACTER(len=64) :: dsname
   CHARACTER(len=4)  :: prb
   LOGICAL           :: nldebug
 !
   INTEGER                       :: nx, ny, levels
   TYPE(grid2_type), ALLOCATABLE :: grids(:), new_grids(:)
   INTEGER, DIMENSION(ndims)     :: e, s, npt_glob, npt_loc, npt_loc_min
 !
   CHARACTER(len=64) :: str
   REAL(rkind)       :: err
   INTEGER           :: i, k, l
 !--------------------------------------------------------------------------------
 !                              1.0 Prologue
 !
 !   Init MPI and setup 2D grid topology
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
   CALL mpi_dims_create(npes, ndims, dims, ierr)
   CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,&
        &               ierr)
   CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr)
   CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr)
   CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr)
 !
   IF( me .EQ. 0 ) WRITE(*,'(a,i3,i3)') '2d processor grid', dims
 !
 !   Get nx, ny, levels from h5 file created by test_intergrid0
 !
   IF( command_argument_count() > 0 ) THEN
      CALL get_command_argument(1, filein)
   END IF
   IF(me.EQ.0) WRITE(*,'(a,a)') 'filein = ', TRIM(filein)
 !
   CALL mpi_comm_dup(comm_cart, comm_futils, ierr)
   CALL openf(filein, fin, mpicomm=comm_futils)
   CALL getatt(fin, '/', 'NX', nx, ierr)
   CALL getatt(fin, '/', 'NY', ny, ierr)
   CALL getatt(fin, '/', 'LEVELS', levels, ierr)
   CALL getatt(fin, '/', 'PRB', prb, ierr)
   CALL getatt(fin, '/', 'NLDEBUG', nldebug, ierr)
   IF(me.EQ.0) WRITE(*,'(a,a,3i5,l3)') 'prb, nx, ny, levels: ', prb, nx, ny, &
        &                              levels, nldebug
 !--------------------------------------------------------------------------------
 !                              2.0 Read (f,v) from h5 file
 !
   ALLOCATE(grids(levels))
 !
 !    Partition on finest grid
 !
   npt_glob(1) = nx+1
   npt_glob(2) = ny+1
   DO i=1,ndims
      CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i))
      e(i) = s(i) + npt_loc(i) - 1
      lmax(i) = get_lmax(s(i), npt_loc(i), 1, comm1d(i))
   END DO
   npt_loc = e-s+1
   IF(me.EQ.0) WRITE(*,'(a,2i4)') 'lmax', lmax
 !
 !    Partition on coaser grids
 !
   DO l=1,levels
      IF(l.GT.1) THEN
         CALL coarse(s,e)
         npt_loc = e-s+1
         CALL mpi_allreduce(npt_loc, npt_loc_min, 2, MPI_INTEGER, &
              &             MPI_MIN, comm_cart, ierr)
         CALL mpi_allreduce(e, npt_glob, 2, MPI_INTEGER, MPI_MAX, &
              &             comm_cart, ierr)
         npt_glob = npt_glob+1
      END IF
      WRITE(str,'(a,i3,a)') 'Partition at level', l, ': start. index ='
      CALL disp(TRIM(str), s, comm_cart)
      IF(me.EQ.0) THEN 
         WRITE(*,'(a,2i6)') 'npt_glob   ', npt_glob
         WRITE(*,'(a,2i6)') 'npt_loc_min', npt_loc_min
      END IF
      grids(l)%s = s
      grids(l)%e = e
      grids(l)%npt = npt_glob
      grids(l)%f = gvector_2d(s, e, [1,1])
      grids(l)%v = gvector_2d(s, e, [1,1])
      ALLOCATE(grids(l)%x(0:npt_glob(1)-1))  ! Global coords (x,y)
      ALLOCATE(grids(l)%y(0:npt_glob(2)-1))
      WRITE(dsname,'("/mglevels/level.",i2.2)') l
      CALL getarr(fin, TRIM(dsname)//"/x", grids(l)%x)
      CALL getarr(fin, TRIM(dsname)//"/y", grids(l)%y)
      CALL getarrnd(fin, TRIM(dsname)//"/v", grids(l)%v%val, [1,2], garea=[1,1])
      CALL getarrnd(fin, TRIM(dsname)//"/f", grids(l)%f%val, [1,2], garea=[1,1])
   END DO
 !--------------------------------------------------------------------------------
 !                              3.0 Parallel intergrid transfer
 !
   ALLOCATE(new_grids(levels))
   CALL copy_grids(grids, new_grids)
 !
 !   Set up restriction stencil
 !
   DO l=2,levels
      CALL init_restrict(new_grids(l), prb, comm_cart)
   END DO
 !
 !   Prolongation of v
 !
   DO l=levels-1,1,-1
      CALL exchange(comm_cart, grids(l+1)%v)
      CALL prolong(grids(l+1)%v, new_grids(l)%v)
      IF(nldebug) THEN
         IF(me.EQ.0) WRITE(*,'(a)') '====='
         DO k=0,npes
            IF(me.EQ.k) THEN
               s = grids(l+1)%f%s
               e = grids(l+1)%f%e
               WRITE(*,'(a,i2)') 'reference vbar on proc.', me
               DO i=s(1),e(1)
                  WRITE(*,'(10f8.3)') grids(l+1)%v%val(i,s(2):e(2))
               END DO
               s = grids(l)%f%s
               e = grids(l)%f%e
               WRITE(*,'(a,i2)') 'reference v on proc.', me
               DO i=s(1),e(1)
                  WRITE(*,'(10f8.3)') grids(l)%v%val(i,s(2):e(2))
               END DO
               WRITE(*,'(a,i2)') 'compute v on proc.', me
               DO i=s(1),e(1)
                  WRITE(*,'(10f8.3)') new_grids(l)%v%val(i,s(2):e(2))
               END DO
            END IF
            CALL mpi_barrier(comm_cart, ierr)
         END DO
      END IF
      err = norm_vec(new_grids(l)%v-grids(l)%v, comm_cart, 0)
      IF(me.EQ.0) WRITE(*,'(a,i3,1pe12.3)') 'Error of prolongation: ', l, err
   END DO
 !
 !   Restriction of f
 !
   DO l=2,levels
      CALL exchange(comm_cart, grids(l-1)%f)
      CALL restrict(new_grids(l)%restrict_mat, grids(l-1)%f, new_grids(l)%f)
      IF(nldebug) THEN
         IF(me.EQ.0)  WRITE(*,'(a)') '====='
         DO k=0,npes
            IF(me.EQ.k) THEN
               s = grids(l-1)%f%s
               e = grids(l-1)%f%e
               WRITE(*,'(a,i2)') 'reference f on proc.', me
               DO i=s(1),e(1)
                  WRITE(*,'(10f8.3)') grids(l-1)%f%val(i,s(2):e(2))
               END DO
               s = grids(l)%f%s
               e = grids(l)%f%e
               WRITE(*,'(a,i2)') 'reference fbar on proc.', me
               DO i=s(1),e(1)
                  WRITE(*,'(10f8.3)') grids(l)%f%val(i,s(2):e(2))
               END DO
               WRITE(*,'(a,i2)') 'compute fbar on proc.', me
               DO i=s(1),e(1)
                  WRITE(*,'(10f8.3)') new_grids(l)%f%val(i,s(2):e(2))
               END DO
            END IF
            CALL mpi_barrier(comm_cart, ierr)
         END DO
      END IF
      err = norm_vec(new_grids(l)%f-grids(l)%f, comm_cart, 0)
      IF(me.EQ.0) WRITE(*,'(a,i3,1pe12.3)') 'Error of restriction: ', l, err
   END DO
 !--------------------------------------------------------------------------------
 !                              9.0 Epilogue
 !
   CALL closef(fin)  
   CALL mpi_finalize(ierr)
 !
 CONTAINS
   SUBROUTINE copy_grids(g1, g2)
     TYPE(grid2_type), INTENT(in)  :: g1(:)
     TYPE(grid2_type), INTENT(out) :: g2(:)
     INTEGER :: l
     DO l=1,SIZE(g1)
        g2(l)%s = g1(l)%s
        g2(l)%e = g1(l)%e
        g2(l)%npt_loc = g1(l)%npt_loc
        g2(l)%npt = g1(l)%npt
        ALLOCATE(g2(l)%x(0:g2(l)%npt(1)-1)); g2(l)%x = g1(l)%x
        ALLOCATE(g2(l)%y(0:g2(l)%npt(2)-1)); g2(l)%y = g1(l)%y
        g2(l)%v = gvector_2d(g1(l)%v%s, g1(l)%v%e, g1(l)%v%g); g2(l)%v%val = g1(l)%f%val
        g2(l)%f = gvector_2d(g1(l)%f%s, g1(l)%f%e, g1(l)%f%g); g2(l)%f%val = g1(l)%f%val
     END DO
   END SUBROUTINE copy_grids
 END PROGRAM main
diff --git a/multigrid/src/test_jacobi.f90 b/multigrid/src/test_jacobi.f90
index f914535..da791e5 100644
--- a/multigrid/src/test_jacobi.f90
+++ b/multigrid/src/test_jacobi.f90
@@ -1,254 +1,254 @@
 !>
 !> @file test_jacobi.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !    Test 2D  parallel Jacobi using STENCIL_2D matrix-free structure.
 !
 MODULE mod
   USE iso_fortran_env, ONLY : rkind => real64
   IMPLICIT NONE
 !
   LOGICAL, PARAMETER     :: nldebug=.FALSE.
   REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0)
 CONTAINS
 END MODULE mod
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 PROGRAM main
   USE mpi
   USE pputils2, ONLY : dist1d, exchange, norm2_vec=>ppnorm2, timera, hostlist
   USE parmg, ONLY    : jacobi, get_resids
   USE stencil, ONLY  : stencil_2d, init, laplacian, putmat
   USE mod
   IMPLICIT NONE
 !
   INTEGER, PARAMETER :: ndims=2
 !
   INTEGER                   :: me, neighs(4), npes, ierr
   INTEGER, DIMENSION(ndims) :: dims=[0,0]
   INTEGER, DIMENSION(ndims) :: coords, comm1d
   LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.]
   LOGICAL                   :: reorder =.FALSE.
   INTEGER                   :: comm_cart
 !
   INTEGER                   :: nx=4, ny=4    ! Number of intervals
   INTEGER, DIMENSION(ndims) :: e, s, lb, ub, npt_glob, npt_loc
 !
   REAL(rkind), ALLOCATABLE  :: xgrid(:), ygrid(:)
   REAL(rkind)               :: dx, dy
   INTEGER, DIMENSION(5,2)   :: id     ! 5-point stencil 
   INTEGER                   :: npoints
   TYPE(stencil_2d)          :: mat
   INTEGER                   :: i
 !
   REAL(rkind), ALLOCATABLE  :: f(:,:), v(:,:), u(:,:)
   REAL(rkind), ALLOCATABLE  :: resids(:,:), errs(:,:)
   REAL(rkind), ALLOCATABLE  :: resid_it(:), err_it(:)
   REAL(rkind)               :: omega=1.0d0, resid
   INTEGER                   :: it, it_skip, nits=100
 !
   NAMELIST /in/ nx, ny, omega, nits
 !================================================================================
 !                        1.0  Prologue
 !
 !  2D process grid
   CALL mpi_init(ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
   CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
   CALL mpi_dims_create(npes, ndims, dims, ierr)
   CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,&
        &               ierr)
 !
   CALL mpi_comm_rank(comm_cart, me, ierr)
   CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr)
   CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr)
   CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr)
 !
   CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr)
   CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr)
 !
   CALL hostlist(comm_cart)
   IF(me.EQ.0) THEN
      WRITE(*,'(a,i0,a,i0/)') "Process grid: ", dims(1), " X ", dims(2)
   END IF
 !
 !  Read problem inputs
   IF(me.EQ.0) THEN
      READ(*,in)
      WRITE(*,in)
   END IF
 !
   CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr)
   CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr)
   CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, comm_cart, ierr)
   CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr)
 !================================================================================
 !                        2.0  2d Grid construction
 !
 !  Partition 2D grid
   CALL timera(0, 'Grid_construction')
   npt_glob(1) = nx+1
   npt_glob(2) = ny+1
   DO i=1,ndims
      CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i))
      e(i) = s(i) + npt_loc(i) - 1
   END DO
   WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', &
        &  (coords(i),s(i),e(i),i=1,ndims)
 !
 !   Global mesh
   dx = 1.0d0/REAL(nx)
   dy = 1.0d0/REAL(ny)
   ALLOCATE(xgrid(0:nx))
   ALLOCATE(ygrid(0:ny))
   xgrid = [ (i*dx, i=0,nx) ]
   ygrid = [ (i*dy, i=0,ny) ]
   CALL timera(1, 'Grid_construction')
 !================================================================================
 !                        3.0  FD Laplacian
 !
   CALL timera(0, 'Laplacian')
   id=RESHAPE([ 0, -1, 0, 1, 0,  & 
                0,  0,-1, 0, 1], &
               [5,2])
   npoints = 5
   CALL init(s, e, id, .FALSE., mat, comm_cart)
 !
   CALL laplacian(dx, dy, mat)
   CALL timera(1, 'Laplacian')
 !================================================================================
 !                        4.0 Test parallel Jacobi with \nabla u(x,y) = f(x,y)
 !
 !   Problem definition
 !
   s = mat%s
   e = mat%e
   lb = s-1
   ub = e+1
   ALLOCATE(f(lb(1):ub(1),lb(2):ub(2)))       ! RHS
   ALLOCATE(v(lb(1):ub(1),lb(2):ub(2)))       ! Exact solutions
   ALLOCATE(u(lb(1):ub(1),lb(2):ub(2)))       ! Computed solutions
   ALLOCATE(resids(lb(1):ub(1),lb(2):ub(2)))  ! Residuals
   ALLOCATE(errs(lb(1):ub(1),lb(2):ub(2)))    ! Errors
   ALLOCATE(resid_it(0:nits))
   ALLOCATE(err_it(0:nits))
 !
   f(s(1):e(1),s(2):e(2)) = rhs(xgrid(s(1):e(1)),ygrid(s(2):e(2)))
   v(s(1):e(1),s(2):e(2)) = exact(xgrid(s(1):e(1)),ygrid(s(2):e(2)))
   CALL exchange(comm_cart, f)
   CALL exchange(comm_cart, v)
 !
 !   Residuals of exact solutions
   resids = get_resids(mat,v,f)
   resid = norm2_vec(resids, comm_cart)
 !
 !   Jacobi iteration loop
 !
   IF(me.EQ.0) WRITE(*,'(/a6,t14,a,t34,a)') 'it', 'residual norm', 'discretization error'
   u = 0.0d0
   CALL exchange(comm_cart, u)
   resids = get_resids(mat,u,f)
   errs  = u-v
   resid_it(0) = norm2_vec(resids, comm_cart)
   err_it(0)   = norm2_vec(errs, comm_cart)
   it_skip = MAX(1,nits/10)
 !
   CALL timera(0, 'Jacobi')
   DO it=1,nits
      CALL jacobi(mat, omega, 1, u, f)
      CALL exchange(comm_cart, u)
      resids = get_resids(mat,u,f)
      errs  = u-v
      resid_it(it) = norm2_vec(resids, comm_cart)
      err_it(it)   = norm2_vec(errs, comm_cart)
      IF(me.EQ.0 .AND. MODULO(it,it_skip).EQ.0 ) THEN 
         WRITE(*,'(i6,4(1pe12.3))') it, resid_it(it), resid_it(it)/resid_it(it-1),&
           &    err_it(it), err_it(it)/err_it(it-1)
      END IF
   END DO
   CALL timera(1, 'Jacobi')
 !================================================================================
 !                        9.0  Epilogue
   CALL h5file
 !
   CALL timera(9, '')
   CALL MPI_FINALIZE(ierr)
 CONTAINS
   SUBROUTINE disp(str, arr)
     CHARACTER(len=*), INTENT(in) :: str
     REAL(rkind), INTENT(in)      :: arr(:,:)
     INTEGER :: j
     WRITE(*,'(/a)') str
     DO j=1,SIZE(arr,2)
        WRITE(*,'(10f8.3)') arr(:,j)
     END DO
   END SUBROUTINE disp
 !
   SUBROUTINE h5file
     USE futils
     CHARACTER(len=128) :: file='test_jacobi.h5'
     INTEGER :: fid
     CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart)
     CALL putarr(fid, '/xgrid', xgrid, ionode=0)  ! only rank 0 does IO
     CALL putarr(fid, '/ygrid', ygrid, ionode=0)  ! only rank 0 does IO
 !
     CALL putarrnd(fid, '/f',      f,     (/1,2/), garea=(/1,1/))
     CALL putarrnd(fid, '/v',      v,     (/1,2/), garea=(/1,1/))
     CALL putarrnd(fid, '/u',      u,     (/1,2/), garea=(/1,1/))
     CALL putarrnd(fid, '/errs',   errs,  (/1,2/), garea=(/1,1/))
     CALL putarrnd(fid, '/resids', resids,(/1,2/), garea=(/1,1/))
 !
     CALL putarr(fid, '/resid', resid_it, ionode=0)
     CALL putarr(fid, '/error', err_it, ionode=0)
 !
     CALL putmat(fid, '/MAT', mat)
     CALL closef(fid)
   END SUBROUTINE h5file
 !
   FUNCTION outerprod(x, y) RESULT(r)
 !
 !  outer product
 !
     REAL(rkind), INTENT(in) :: x(:), y(:)
     REAL(rkind)             :: r(SIZE(x),SIZE(y))
     INTEGER :: i, j
     DO j=1,SIZE(y)
        DO i=1,SIZE(x)
           r(i,j) = x(i)*y(j)
        END DO
     END DO
   END FUNCTION outerprod
 !
   FUNCTION rhs(x,y)
     REAL(rkind), INTENT(in) :: x(:), y(:)
     REAL(rkind)             :: rhs(SIZE(x),SIZE(y))
     rhs = -10.d0*pi**2 * outerprod(SIN(pi*x), SIN(3.d0*pi*y))
   END FUNCTION rhs
 !
   FUNCTION exact(x,y)
     REAL(rkind), INTENT(in) :: x(:), y(:)
     REAL(rkind)             :: exact(SIZE(x),SIZE(y))
     exact = outerprod(SIN(pi*x), SIN(3.d0*pi*y))
   END FUNCTION exact
 END PROGRAM main
diff --git a/multigrid/src/test_jacobig.f90 b/multigrid/src/test_jacobig.f90
index 1fdca83..ff71a3e 100644
--- a/multigrid/src/test_jacobig.f90
+++ b/multigrid/src/test_jacobig.f90
@@ -1,331 +1,331 @@
 !>
 !> @file test_jacobig.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !    Test 2D  parallel Jacobi using STENCIL_2D matrix-free structure.
 !
 MODULE mod
   USE iso_fortran_env, ONLY : rkind => real64
   IMPLICIT NONE
 !
   LOGICAL, PARAMETER     :: nldebug=.FALSE.
   REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0)
 CONTAINS
 END MODULE mod
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 PROGRAM main
   USE mpi
   USE fdmat_mod, ONLY : fdmat, ibc_fdmat, ibc_rhs
   USE pputils2, ONLY  : dist1d, timera, hostlist
   USE gvector, ONLY   : gvector_2d, ASSIGNMENT(=), OPERATOR(-)
   USE parmg, ONLY     : grid2_type, create_grid, jacobi, exchange, get_resids, norm_vec
   USE stencil, ONLY   : stencil_2d, putmat
   USE mod
   IMPLICIT NONE
 !
   INTEGER, PARAMETER :: ndims=2
 !
   INTEGER                   :: me, neighs(4), npes, ierr
   INTEGER, DIMENSION(ndims) :: dims=[0,0]
   INTEGER, DIMENSION(ndims) :: coords, comm1d
   LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.]
   LOGICAL                   :: reorder =.FALSE.
   INTEGER                   :: comm_cart
   INTEGER, DIMENSION(ndims) :: e0, s0, e, s, npt_glob, npt_loc
 !
   REAL(rkind), ALLOCATABLE  :: xgrid(:), ygrid(:)
   INTEGER, ALLOCATABLE      :: id(:,:)
   REAL(rkind)               :: dx, dy
   INTEGER                   :: npoints ! Number of points in FD stencil
 !
   TYPE(gvector_2d)          :: v_exact, resids, errs
   REAL(rkind), ALLOCATABLE  :: resid_it(:), err_it(:)
   INTEGER, DIMENSION(ndims) :: g
   INTEGER                   :: i, it, it_skip
 !
   INTEGER :: levels=1
   TYPE(grid2_type), ALLOCATABLE :: grids(:)
 !
 !   Input quantities
 !
   CHARACTER(len=4) :: prb='dddd'
   INTEGER          :: nx=4, ny=4    ! Number of intervals
   INTEGER          :: kx=1, ky=1
   REAL(rkind)      :: Lx=1.0, Ly=1.0
   REAL(rkind)      :: icrosst=1.0, beta=0.0, miome=200.0
   REAL(rkind)      :: omega=1.0d0
   INTEGER          :: nits=100, nu=1
 !
   NAMELIST /in/ prb, nx, ny, kx, ky, Lx, Ly, icrosst, beta, &
        &        miome, omega, nits, nu
 !================================================================================
 !                        1.0  Prologue
 !
 !  2D process grid
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
   CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
   CALL mpi_dims_create(npes, ndims, dims, ierr)
   CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,&
        &               ierr)
 !
   CALL mpi_comm_rank(comm_cart, me, ierr)
   CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr)
   CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr)
   CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr)
 !
   CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr)
   CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr)
 !
   CALL hostlist(comm_cart)
   IF(me.EQ.0) THEN
      WRITE(*,'(a,i0,a,i0/)') "Process grid: ", dims(1), " X ", dims(2)
   END IF
 !
 !  Read problem inputs
 !
   IF(me.EQ.0) THEN
      READ(*,in)
      WRITE(*,in)
   END IF
   CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, comm_cart, ierr)
   CALL mpi_bcast(kx, 1, MPI_INTEGER, 0, comm_cart, ierr)
   call mpi_bcast(ky, 1, MPI_INTEGER, 0, comm_cart, ierr)
   CALL mpi_bcast(icrosst, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr)
   CALL mpi_bcast(Lx,1,MPI_DOUBLE_PRECISION,0,comm_cart, ierr)
   CALL mpi_bcast(Ly,1,MPI_DOUBLE_PRECISION,0,comm_cart, ierr)
   CALL mpi_bcast(beta,1,MPI_DOUBLE_PRECISION,0,comm_cart, ierr)
   CALL mpi_bcast(miome, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr)
   CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr)
   CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr)
   CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, comm_cart, ierr)
   CALL mpi_bcast(nu, 1, MPI_INTEGER, 0, comm_cart, ierr)
   CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr)
 !================================================================================
 !                        2.0  2d Grid construction
 !
 !  Partition 2D grid
 !
   CALL timera(0, 'Grid_construction')
   npt_glob(1) = nx+1
   npt_glob(2) = ny+1
   DO i=1,ndims
      CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i))
      e(i) = s(i) + npt_loc(i) - 1
   END DO
   WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', &
        &  (coords(i),s(i),e(i),i=1,ndims)
 !
 !   Global mesh
 !
   dx = Lx/REAL(nx)
   dy = Ly/REAL(ny)
   ALLOCATE(xgrid(0:nx))
   ALLOCATE(ygrid(0:ny))
   xgrid = [ (i*dx, i=0,nx) ]
   ygrid = [ (i*dy, i=0,ny) ]
   CALL timera(1, 'Grid_construction')
 !
 !   Create grid structure
 !
   ALLOCATE(grids(levels))
   npoints = 9             ! Size of FD stencil
   ALLOCATE(id(npoints,2))
   id=RESHAPE([ 0, -1,  0,  1, -1, 1, -1, 0, 1,  & 
                0, -1, -1, -1,  0, 0,  1, 1, 1], &
               [npoints,2])
   CALL create_grid(xgrid, ygrid, s, e, id, prb, grids, comm_cart)
 !================================================================================
 !                        3.0  FD Operator
 !
   CALL timera(0, 'Laplacian')
 !
   CALL fdmat(grids(1),  fdense, icrosst, grids(1)%fdmat)
   CALL ibc_fdmat(grids(1)%fdmat, prb)
 !
   CALL timera(1, 'Laplacian')
 !================================================================================
 !                        4.0 RHS and exact solution
 !
 !   Allocate memory
 !
   s0 = grids(1)%s0; e0 = grids(1)%e0
   s  = grids(1)%s;  e  = grids(1)%e
   g = [1,1]
   v_exact = gvector_2d(s, e, g) ! Exact solutions   
   errs    = gvector_2d(s, e, g) ! Disc. errors
   resids  = gvector_2d(s, e, g) ! Residuals 
   ALLOCATE(resid_it(0:nits))
   ALLOCATE(err_it(0:nits))
 !
 !   Set RHS at the finest grid. Impose Dirichlet BC.
 !
   grids(1)%f = frhs(xgrid(s(1):e(1)),ygrid(s(2):e(2)))
   CALL ibc_rhs(grids(1)%f, s0, e0, prb)
 !
 !   Exact solutions
 !
   v_exact = fexact(xgrid(s(1):e(1)),ygrid(s(2):e(2)))
 !================================================================================
 !                        5.0  Jacobi iteration loop
 !
   IF(me.EQ.0) WRITE(*,'(/a6,t14,a,t34,a)') 'it', 'residual norm', 'discretization error'
   grids(1)%v = 0.0d0
   resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f)
   errs  = grids(1)%v - v_exact
   resid_it(0) = norm_vec(resids, comm_cart, root=0)
   err_it(0)   = norm_vec(errs, comm_cart, root=0)
   it_skip = MAX(1,nits/10)
 !
   CALL timera(0, 'Jacobi')
   DO it=1,nits
      CALL jacobi(comm_cart, grids(1)%fdmat, omega, nu, grids(1)%v, grids(1)%f)
      resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f)
      errs  = grids(1)%v - v_exact
      resid_it(it) = norm_vec(resids, comm_cart, root=0)
      err_it(it)   = norm_vec(errs, comm_cart, root=0)
      IF(me.EQ.0 .AND. MODULO(it,it_skip).EQ.0 ) THEN 
         WRITE(*,'(i6,4(1pe12.3))') it, resid_it(it), resid_it(it)/resid_it(it-1),&
           &    err_it(it), err_it(it)/err_it(it-1)
      END IF
   END DO
   CALL timera(1, 'Jacobi')
 !================================================================================
 !                        9.0  Epilogue
   CALL h5file
 !
   CALL timera(9, '')
   CALL MPI_FINALIZE(ierr)
 CONTAINS
 !
 !+++
   FUNCTION fdense(x)
 !
 !   Return density
 !
     REAL(rkind), INTENT(in) :: x(:)
     REAL(rkind)              :: fdense(SIZE(x))
     fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 );
   END FUNCTION fdense
 !+++
   FUNCTION fexact(x,y)
 !
 !   Return analytical solution
 !
     REAL(rkind), INTENT(in) :: x(:), y(:)
     REAL(rkind) :: fexact(SIZE(x),SIZE(y))
     REAL(rkind) :: c
     INTEGER :: j
     IF(prb.EQ.'dddd') THEN
        DO j=1,SIZE(y)
           c = SIN(2.0d0*pi*ky*y(j)/Ly)
           fexact(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c
        END DO
     ELSE IF (prb.EQ.'nndd') THEN
        DO j=1,SIZE(y)
           c = SIN(2.0d0*pi*ky*y(j)/Ly)
           fexact(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c
        END DO
     END IF
   END FUNCTION fexact
 !+++
   SUBROUTINE h5file
     USE futils
     CHARACTER(len=128) :: file='test_jacobig.h5'
     INTEGER :: fid
     CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart)
     CALL putarr(fid, '/xgrid', xgrid, ionode=0)  ! only rank 0 does IO
     CALL putarr(fid, '/ygrid', ygrid, ionode=0)  ! only rank 0 does IO
 !
     CALL putarrnd(fid, '/f', grids(1)%f%val, (/1,2/), garea=g)
     CALL putarrnd(fid, '/v', v_exact%val, (/1,2/), garea=g)
     CALL putarrnd(fid, '/u', grids(1)%v%val, (/1,2/), garea=g)
     CALL putarrnd(fid, '/errs',   errs%val,  (/1,2/), garea=(/1,1/))
     CALL putarrnd(fid, '/resids', resids%val,(/1,2/), garea=(/1,1/))
 !
     CALL putarr(fid, '/resid', resid_it, ionode=0)
     CALL putarr(fid, '/error', err_it, ionode=0)
 !
     CALL putmat(fid, '/MAT', grids(1)%fdmat)
     CALL closef(fid)
   END SUBROUTINE h5file
 !+++
   FUNCTION frhs(x,y)
 !
 !   Return RHS
 !
     REAL(rkind), INTENT(in) :: x(:), y(:)
     REAL(rkind) :: frhs(SIZE(x),SIZE(y))
     REAL(rkind) :: c, s, d(SIZE(x))
     REAL(rkind) :: corr
     INTEGER :: j
     corr = 1.d0+icrosst**2/4.0d0
     d(:) = fdense(x(:))
     IF(prb.EQ.'dddd') THEN
        DO j=1,SIZE(y)
           c = COS(2.0d0*pi*ky*y(j)/Ly)
           s = SIN(2.0d0*pi*ky*y(j)/Ly)
           frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s &
                &     -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) &
                &     + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s
        END DO
     ELSE IF (prb.EQ.'nndd') THEN
        DO j=1,SIZE(y)
           c = COS(2.0d0*pi*ky*y(j)/Ly)
           s = SIN(2.0d0*pi*ky*y(j)/Ly)
           frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s &
                &     +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) &
                &     + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s
        END DO
     END IF
   END FUNCTION frhs
 !+++!
   FUNCTION outerprod(x, y) RESULT(r)
 !
 !  outer product
 !
     REAL(rkind), INTENT(in) :: x(:), y(:)
     REAL(rkind)             :: r(SIZE(x),SIZE(y))
     INTEGER :: i, j
     DO j=1,SIZE(y)
        DO i=1,SIZE(x)
           r(i,j) = x(i)*y(j)
        END DO
     END DO
   END FUNCTION outerprod
 !
   FUNCTION rhs(x,y)
     REAL(rkind), INTENT(in) :: x(:), y(:)
     REAL(rkind)             :: rhs(SIZE(x),SIZE(y))
     rhs = -10.d0*pi**2 * outerprod(SIN(pi*x), SIN(3.d0*pi*y))
   END FUNCTION rhs
 !
   FUNCTION exact(x,y)
     REAL(rkind), INTENT(in) :: x(:), y(:)
     REAL(rkind)             :: exact(SIZE(x),SIZE(y))
     exact = outerprod(SIN(pi*x), SIN(3.d0*pi*y))
   END FUNCTION exact
 END PROGRAM main
diff --git a/multigrid/src/test_mg.f90 b/multigrid/src/test_mg.f90
index 8275f7a..7a90f57 100644
--- a/multigrid/src/test_mg.f90
+++ b/multigrid/src/test_mg.f90
@@ -1,279 +1,279 @@
 !>
 !> @file test_mg.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test multigrid V-cycle
 !
   USE multigrid
   USE math_util, ONLY : root_bessj
   IMPLICIT NONE
 !
   INTEGER          :: nx=8, nidbas=1, ngauss=2, alpha=0, nits=40
   INTEGER          :: modem=22, modep=10
   INTEGER          :: levels=2, nu1=1, nu2=1, mu=1, nu0=1
   CHARACTER(len=4) :: relax='jac '
   LOGICAL          :: nlfixed = .FALSE.
   DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0)
   DOUBLE PRECISION :: omega=2.0d0/3.0d0
   INTEGER          :: l, nrank, its
   DOUBLE PRECISION :: errdisc_dir
   DOUBLE PRECISION, ALLOCATABLE :: u_direct(:), u_exact(:), u_calc(:)
   DOUBLE PRECISION, ALLOCATABLE :: sol_direct(:), sol_calc(:)
   DOUBLE PRECISION, ALLOCATABLE :: err(:), resid(:), errdisc(:)
   DOUBLE PRECISION, ALLOCATABLE :: errdisc_fmg(:)
 !
   TYPE(grid1d), ALLOCATABLE :: gridx(:)
   TYPE(mg_info)             :: info
 !
   NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, modem, modep, alpha, &
        &            relax, omega, nits, nlfixed, levels, nu1, nu2, mu, nu0
 !--------------------------------------------------------------------------------
 !                   1.  Prologue
 !   Inputs
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
   levels = MIN(levels, get_lmax(nx))
 !
   info%nu1 = nu1
   info%nu2 = nu2
   info%mu = mu
   info%nu0 = nu0
   info%levels = levels
   info%relax = relax
   info%omega = omega
 !
 !   Create grids
 !
   ALLOCATE(gridx(levels))
   CALL create_grid(nx, nidbas, ngauss, alpha, gridx)
   WRITE(*,'(a/(20i6))') 'Number of intervals in grids', (gridx(l)%n, l=1,levels)
 !
 !   Create FE matrice and set BC u(0)=u(1)=0
 !
   DO l=1,levels
      CALL femat(gridx(l)%spl, gridx(l)%mata, coefeq)
 !
 !     Left Dirichlet BC (only for Cartesian geometry)
      IF(alpha .EQ. 0) THEN
         CALL ibcmat(1, gridx(l)%mata)
      END IF
 !
 !     Right Dirichlet BC
      CALL ibcmat(gridx(l)%mata%rank, gridx(l)%mata)
 !
 !     BC on grid transfer operator
      IF(l.GT.1) THEN
         WHERE( ABS(gridx(l)%transf%val) < 1.d-8) gridx(l)%transf%val=0.0d0
         IF(alpha .EQ. 0) gridx(l)%transf%val(2:,1)=0.0d0
         gridx(l)%transf%val(1:gridx(l-1)%rank-1,gridx(l)%rank)=0.0d0
      END IF
   END DO
   CALL printdiag_gb('Diagonal of coarsest A', gridx(levels)%mata)
 !
 !   Construct RHS and set BC only on the finest grid
 !
   nrank = gridx(1)%rank
   CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs)
 !
 !     Left Dirichlet BC (only for Cartesian geometry)
   IF(alpha .EQ. 0) THEN
      gridx(1)%f(1) = 0.0d0
   END IF
 !
 !     Right Dirichlet BC
   gridx(1)%f(nrank) = 0.0d0
 !--------------------------------------------------------------------------------
 !                   2.  Direct solution
 !
   WRITE(*,'(//a)') 'Direct solution for the finest grid problem'
   ALLOCATE(u_direct(0:nx))
   ALLOCATE(sol_direct(nrank))
   CALL direct_solve(gridx(1), sol_direct)
   CALL gridval(gridx(1)%spl, gridx(1)%x, u_direct, 0, sol_direct)
   errdisc_dir = disc_err(gridx(1)%spl, sol_direct, sol)
   WRITE(*,'(a,1pe12.3)') 'Discretization error', errdisc_dir
 !--------------------------------------------------------------------------------
 !                   3.  Solution from MG V-cycles
 !
   WRITE(*,'(//a)') 'Multigrid MG V-cycles'
   ALLOCATE(sol_calc(nrank))
   ALLOCATE(err(0:nits))
   ALLOCATE(errdisc(0:nits))
   ALLOCATE(resid(0:nits))
 !
 !   Initial guess
 !
   sol_calc(:) = 0.0d0
   IF(nlfixed) THEN
      sol_calc(:) = sol_direct(:)
   END IF
   gridx(1)%v(:) = sol_calc(:)
   err(0) = normf(gridx(1)%matm, sol_calc-sol_direct)
   errdisc(0) = disc_err(gridx(1)%spl, sol_calc, sol)
   resid(0) = residue(gridx(1)%mata, gridx(1)%f, sol_calc)
 !
 !   Iterations
 !
   DO its=1,nits
      CALL mg(gridx, info, 1)
      sol_calc(:) = gridx(1)%v(:)
      err(its) = normf(gridx(1)%matm, sol_calc-sol_direct)
      errdisc(its) = disc_err(gridx(1)%spl, sol_calc, sol)
      resid(its) = residue(gridx(1)%mata, gridx(1)%f, sol_calc)
   END DO
 !
   WRITE(*,'(a4,3(a12,a8))') 'its', 'error', 'ratio', 'residue', 'ratio', &
        &                'disc. err', 'ratio'
   WRITE(*,'(i4,3(1pe12.3,8x))') 0, err(0), resid(0), errdisc(0)
   WRITE(*,'((i4,3(1pe12.3,0pf8.2)))') (its, err(its), err(its)/err(its-1), &
        &   resid(its), resid(its)/resid(its-1), &
        &   errdisc(its), errdisc(its)/errdisc(its-1), its=1,nits)
 !--------------------------------------------------------------------------------
 !                   4.  Solution from FMG
 !
   WRITE(*,'(//a)') 'Full Multigrid'
 
   ALLOCATE(errdisc_fmg(nits))
   DO its=1,nits
      info%nu0 = its
      CALL fmg(gridx, info, 1)
      sol_calc(:) = gridx(1)%v(:)
      errdisc_fmg(its) = disc_err(gridx(1)%spl, sol_calc, sol)
      resid(its) = residue(gridx(1)%mata, gridx(1)%f, sol_calc)
   END DO
   WRITE(*,'(a4,2(a12,a8))') 'nu0', 'residue', 'ratio','disc. err', 'ratio'
   WRITE(*,'((i4,2(1pe12.3,0pf8.3)))') (its, resid(its), resid(its)/resid(its-1), &
        &     errdisc_fmg(its),errdisc_fmg(its)/errdisc_dir, its=1,nits)
 !
 !   Grid values at final iteration
 !
   ALLOCATE(u_exact(0:nx))
   ALLOCATE(u_calc(0:nx))
   u_exact = sol(gridx(1)%x)
   CALL gridval(gridx(1)%spl, gridx(1)%x, u_calc, 0, sol_calc)
 !--------------------------------------------------------------------------------
 !                   9.  Epilogue
 !
 !   Creata HDF5 file
 !
   CALL h5file
 !--------------------------------------------------------------------------------
 CONTAINS
   FUNCTION rhs(x)
     DOUBLE PRECISION, INTENT(in) :: x
     DOUBLE PRECISION :: rhs
     DOUBLE PRECISION :: nump
     SELECT CASE (alpha)
     CASE(0)                  ! Cartesian geometry
        rhs = SIN(pi*kmode*x)
     CASE(1)                  ! Cylindrical
        nump = root_bessj(modem, modep)
        rhs = x * nump**2 * bessel_jn(modem, nump*x)
     END SELECT
   END FUNCTION rhs
   FUNCTION sol(x)
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: sol(SIZE(x))
     DOUBLE PRECISION :: nump
     SELECT CASE (alpha)
     CASE(0)                  ! Cartesian geometry
        sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:))
     CASE(1)                  ! Cylindrical
        nump = root_bessj(modem, modep)
        sol(:) = bessel_jn(modem, nump*x(:))
     END SELECT
   END FUNCTION sol
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
     SELECT CASE (alpha)
     CASE(0)           ! Cartesian geometry
        c(1) = 1.0d0
        idt(1) = 1
        idw(1) = 1
        c(2) = sigma
        idt(2) = 0
        idw(2) = 0
     CASE(1)           ! Cylindrical
        c(1) = x
        idt(1) = 1
        idw(1) = 1
        c(2) = REAL(modem,8)**2/x
        idt(2) = 0
        idw(2) = 0
     END SELECT
   END SUBROUTINE coefeq
   SUBROUTINE h5file
     USE futils
     CHARACTER(len=128) :: file='test_mg.h5'
     INTEGER :: fid
     INTEGER :: l
     CHARACTER(len=64) :: dsname
     CALL creatf(file, fid, real_prec='d')
     CALL attach(fid, '/', 'NX', nx)
     CALL attach(fid, '/', 'NIDBAS', nidbas)
     CALL attach(fid, '/', 'SIGMA', sigma)
     CALL attach(fid, '/', 'KMODE', kmode)
     CALL attach(fid, '/', 'MODEM', modem)
     CALL attach(fid, '/', 'MODEP', modep)
     CALL attach(fid, '/', 'ALPHA', alpha)
     CALL attach(fid, '/', 'RELAX', relax)
     CALL attach(fid, '/', 'OMEGA', omega)
     CALL attach(fid, '/', 'NITS', nits)
     CALL attach(fid, '/', 'NLFIXED', nlfixed)
     CALL attach(fid, '/', 'LEVELS', levels)
     CALL attach(fid, '/', 'NU1', nu1)
     CALL attach(fid, '/', 'NU2', nu2)
     CALL attach(fid, '/', 'NU0', nu0)
     CALL attach(fid, '/', 'MU', mu)
     CALL creatg(fid, '/mglevels')
     DO l=1,levels
        WRITE(dsname,'("/mglevels/level.",i2.2)') l
        CALL creatg(fid, TRIM(dsname))
        CALL putmat(fid, TRIM(dsname)//'/mata', gridx(l)%mata)
        IF(l.GT.1) THEN
           CALL putarr(fid, TRIM(dsname)//'/matp', gridx(l)%transf%val)
           CALL attach(fid, TRIM(dsname)//'/matp', 'M', gridx(l)%transf%mrows)
           CALL attach(fid, TRIM(dsname)//'/matp', 'N', gridx(l)%transf%ncols)
        END IF
        CALL putarr(fid, TRIM(dsname)//'/f', gridx(l)%f)
        CALL putarr(fid, TRIM(dsname)//'/v', gridx(l)%v)
     END DO
     CALL creatg(fid, '/Iterations')
     CALL putarr(fid, '/Iterations/errors', err)
     CALL putarr(fid, '/Iterations/residues', resid)
     CALL putarr(fid, '/Iterations/disc_errors', errdisc)
     CALL putarr(fid, '/Iterations/disc_errors_fmg', errdisc_fmg)
     CALL putarr(fid, '/Iterations/xgrid', gridx(1)%x)
     CALL putarr(fid, '/Iterations/u_direct', u_direct)
     CALL putarr(fid, '/Iterations/u_exact', u_exact)
     CALL putarr(fid, '/Iterations/u_calc', u_calc)
     CALL closef(fid)
   END SUBROUTINE h5file
 END PROGRAM main
diff --git a/multigrid/src/test_mg2d.f90 b/multigrid/src/test_mg2d.f90
index 4730a65..ffa9e6d 100644
--- a/multigrid/src/test_mg2d.f90
+++ b/multigrid/src/test_mg2d.f90
@@ -1,413 +1,413 @@
 !>
 !> @file test_mg2d.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test 2d multigrid: Cartesian case
 !
   USE multigrid
   USE csr
   IMPLICIT NONE
   INCLUDE 'mpif.h'
 !
   DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
   INTEGER, DIMENSION(2) :: n, nidbas, ngauss, alpha
   DOUBLE PRECISION :: kx=4.d0, ky=3.d0, sigma=10.0d0
   CHARACTER(len=4) :: prb='poly'
   INTEGER :: levels=1, nu1=1, nu2=1, mu=1, nu0=1, nits
   CHARACTER(len=4) :: relax='jac '
   DOUBLE PRECISION :: omega=2.0d0/3.0d0, tol
   LOGICAL :: nlfixed=.FALSE.
   DOUBLE PRECISION :: t0, tsetup(2), tdirect, tbsolve, titer, titer_per_step
   DOUBLE PRECISION :: resid_direct, errdisc_direct
   DOUBLE PRECISION :: norma, normb
 !
   DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:)
   DOUBLE PRECISION :: dx, dy
   INTEGER :: ix, iy
 !
   DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: sol_calc_grid(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: rresid(:), resid(:), errdisc(:)
 !
   DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:)
   DOUBLE PRECISION, POINTER     :: sol_direct_1d(:)
   DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:)
 !
   INTEGER :: ierr, me
   INTEGER :: l, nterms
   INTEGER :: its
 !
   TYPE(grid2d), ALLOCATABLE :: grids(:)
   TYPE(mg_info)             :: info
 !
   NAMELIST /newrun/ n, nidbas, ngauss, kx, ky, sigma, alpha, levels, prb, &
        &            nu1, nu2, mu, nu0, relax, nits, tol, nlfixed, omega
 !--------------------------------------------------------------------------------
 !                    1.   Prologue
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
 !   Inputs
 !
   n = (/8, 8/)
   nidbas=(/3,3/)
   ngauss=(/2,2/)
   alpha = (/0,0/)
   kx=4
   ky=3
   sigma=10.0d0
   levels=2
   prb='poly'
   nu1 = 1
   nu2 = 1
   mu = 1
   nu0 = 1
   nits = 10
   tol = 1.e-8
   relax = 'jac'
   nlfixed= .FALSE.
 !
   IF(me.EQ.0) THEN
      READ(*,newrun)
      WRITE(*,newrun)
   END IF
      CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(alpha, 2, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(kx, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(ky, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(sigma, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nu1, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nu2, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nu0, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(mu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(relax, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(tol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
 !
 !   Adjust number of levels and fill mg info.
 !
   levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2)))
   info%nu1 = nu1
   info%nu2 = nu2
   info%mu = mu
   info%nu0 = nu0
   info%levels = levels
   info%relax = relax
   info%omega = omega
 !
 !   Create grids
 !
   t0 = mpi_wtime()
 !
   dx = 1.0d0/REAL(n(1),8)
   dy = 1.0d0/REAL(n(2),8)
   ALLOCATE(x(0:n(1)), y(0:n(2)))
   DO ix=0,n(1)
      x(ix) = ix*dx
   END DO
   DO iy=0,n(2)
      y(iy) = iy*dy
   END DO
 !
   ALLOCATE(grids(levels))
   CALL create_grid(x, y, nidbas, ngauss, alpha, grids)
   WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry'
   WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels)
 !
 !   Construct RHS and set BC only on the finest grid
 !
   CALL disrhs(grids(1)%spl, grids(1)%f, rhs)
   CALL ibcrhs(grids(1), grids(1)%f)
 !
 !   Build FE matrices and set BC
 !
   nterms = 3
   DO l=1,levels
      CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms)
      CALL ibcmat(grids(l), grids(l)%mata)
      CALL to_mat(grids(l)%mata)
   END DO
 !
 !   Set BC on grid transfer matrices
 !
   CALL ibc_transf(grids,1,3)
   CALL ibc_transf(grids,2,3)
 !
   tsetup(1) = mpi_wtime()-t0
 !
 !   Clear and rebuild FE matrices and set BC
 !
   t0 = mpi_wtime()
   nterms = 2
   DO l=1,levels
      CALL clear_mat(grids(l)%mata)
      CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms, noinit=.TRUE.)
      CALL ibcmat(grids(l), grids(l)%mata)
   END DO
   tsetup(2) = mpi_wtime()-t0
 !--------------------------------------------------------------------------------
 !                   1.  Analytical solution (at the finest grid, l=1)
 !
   ALLOCATE(sol_anal_grid(0:n(1),0:n(2)))
   sol_anal_grid = sol(grids(1)%x, grids(1)%y)
 !--------------------------------------------------------------------------------
 !                   2.  Direct solution (at the finest grid, l=1)
 !
   WRITE(*,'(//a)') 'Direct solution for the finest grid problem'
 !
   ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), &
        &         source=grids(1)%f)
   ALLOCATE(sol_direct_grid(0:n(1),0:n(2)))
 !
   sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct
 !
 !
   PRINT*, 'shape of sol_direct', SHAPE(sol_direct)
   PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d)
 !
   t0 = mpi_wtime()
   sol_direct = grids(1)%f
   CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
   tdirect = mpi_wtime()-t0
 !
   t0 = mpi_wtime()
   sol_direct = grids(1)%f
   CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
   resid_direct = residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d)
   errdisc_direct = disc_err(grids(1)%spl, sol_direct, sol)
 !
   tbsolve = mpi_wtime()-t0
 !
   CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, &
        &       [0,0], sol_direct)
 !
   WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', &
        &        errdisc_direct, resid_direct
   WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
        &    NORM2(sol_anal_grid-sol_direct_grid) / NORM2(sol_anal_grid)
 !
   WRITE(*, '(a,1pe12.3)') 'Frobenius norm of A', matnorm(grids(1)%mata)
   WRITE(*, '(a,1pe12.3)') 'Infinity norm of A ', matnorm(grids(1)%mata, 'inf')
   WRITE(*, '(a,1pe12.3)') '1 norm of A        ', matnorm(grids(1)%mata, '1')
 !--------------------------------------------------------------------------------
 !                   3.  Test multigrid V-cycle
 !
   WRITE(*,'(/a)') 'Multigrid MG V-cycles ...'
   ALLOCATE(sol_calc_grid(0:n(1),0:n(2)))
   ALLOCATE(errdisc(0:nits))
   ALLOCATE(resid(0:nits))
   ALLOCATE(rresid(0:nits))
 !
 !   Norm of A and b
 !
   norma = matnorm(grids(1)%mata)
   normb = NORM2(grids(1)%f1d)
 !
 !   Initial guess
 !
   IF(nlfixed) THEN
      grids(1)%v = sol_direct
   ELSE
      grids(1)%v = 0.0d0
   END IF
 !
   errdisc(0) = disc_err(grids(1)%spl, grids(1)%v, sol)
   resid(0) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d)
   rresid(0) = resid(0)  /  ( norma*NORM2(grids(1)%v1d) + normb )
   WRITE(*,'(a4,3(a12,a8))') 'its', 'residue', 'ratio', 'disc. err', 'ratio', &
        &                           'rel. resid', 'ratio'
   WRITE(*,'(i4,3(1pe12.3,8x))') 0, resid(0), errdisc(0), rresid(0)
 !
 !   Iterations
 !
   t0 = mpi_wtime()
   DO its=1,nits
      CALL mg(grids, info, 1)
      errdisc(its) = disc_err(grids(1)%spl, grids(1)%v, sol)
      resid(its) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d)
      rresid(its) = resid(its)  / ( norma*NORM2(grids(1)%v1d) + normb )
      WRITE(*,'((i4,3(1pe12.3,0pf8.2)))')  its, &
           &   resid(its),   resid(its)/resid(its-1), &
           &   errdisc(its), errdisc(its)/errdisc(its-1), &
           &   rresid(its),  rresid(its)/rresid(its-1)
      IF(resid(its) .LE. tol) EXIT
   END DO
   nits = MIN(nits,its)
   titer = mpi_wtime() - t0
   titer_per_step = titer/REAL(its,8)
 !
   CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_calc_grid, &
        &       [0,0], grids(1)%v)
 !--------------------------------------------------------------------------------
 !                   9.  Epilogue
 !
 !   Display timing
 !
   WRITE(*,'(a,2(1pe12.3))') 'Set up time (s)            ', tsetup
   WRITE(*,'(a,2(1pe12.3))') 'Direct and solve time (s)  ', tdirect, tbsolve
   WRITE(*,'(a,1pe12.3,i5)') 'Iter time (s)              ', titer, nits
 !
 !   Creata HDF5 file
 !
   IF(me.EQ.0) CALL h5file
 !
   CALL mpi_finalize(ierr)
 !--------------------------------------------------------------------------------
 CONTAINS
 !+++
   FUNCTION rhs(x, y)
 !
 ! Return problem RHS
 !
     DOUBLE PRECISION, INTENT(in) :: x, y
     DOUBLE PRECISION :: rhs
     DOUBLE PRECISION :: x2, y2
     SELECT CASE(TRIM(prb))
     CASE('poly')
        x2 = x*x; y2 = y*y;
        rhs = 2.d0 * ( (1.0d0-6.d0*x2)*y2*(1.d0-y2) + &
             &         (1.0d0-6.d0*y2)*x2*(1.d0-x2)  )
     CASE('trig')
        rhs = SIN(PI*kx*x)*SIN(PI*ky*y)
     END SELECT
   END FUNCTION rhs
 !+++
   FUNCTION sol(x, y)
 !
 ! Return exact problem solution
 !
     DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
     DOUBLE PRECISION :: sol(SIZE(x),SIZE(y))
     DOUBLE PRECISION :: c
     DOUBLE PRECISION  :: x2(SIZE(x)), y2(SIZE(y))
     INTEGER :: j
     SELECT CASE(TRIM(prb))
     CASE('poly')
        x2 = x*x; y2 = y*y;
        DO j=1,SIZE(y)
           c = y2(j)*(y2(j)-1.d0)
           sol(:,j) = c*x2(:)*(1.0d0-x2(:))
        END DO
     CASE('trig')
        DO j=1,SIZE(y)
           c = SIN(PI*ky*y(j)) / (PI**2*(kx**2+ky**2) + sigma**2)
           sol(:,j) = c * SIN(PI*kx*x(:))
        END DO
     END SELECT
   END FUNCTION sol
 !+++
   SUBROUTINE coefeq(x, y, idt, idw, c)
 !
 ! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV)
 !
     DOUBLE PRECISION, INTENT(in)  :: x, y
     INTEGER, INTENT(out)           :: idt(:,:), idw(:,:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
     c(1) = 1.0d0
     idt(1,1) = 1
     idt(1,2) = 0
     idw(1,1) = 1
     idw(1,2) = 0
 !
     c(2) = 1.0d0
     idt(2,1) = 0
     idt(2,2) = 1
     idw(2,1) = 0
     idw(2,2) = 1
 !
     c(3) = sigma
     idt(3,1) = 0
     idt(3,2) = 0
     idw(3,1) = 0
     idw(3,2) = 0
 
   END SUBROUTINE coefeq
 !+++
   SUBROUTINE h5file
     USE futils
     CHARACTER(len=128) :: file='test_mg2d.h5'
     INTEGER :: fid
     INTEGER :: l
     CHARACTER(len=64) :: dsname
     CALL creatf(file, fid, real_prec='d')
     CALL attach(fid, '/', 'NX', n(1))
     CALL attach(fid, '/', 'NY', n(2))
     CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
     CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
     CALL attach(fid, '/', 'KX', kx)
     CALL attach(fid, '/', 'KY', ky)
     CALL attach(fid, '/', 'SIGMA', sigma)
     CALL attach(fid, '/', 'ALPHA1', alpha(1))
     CALL attach(fid, '/', 'ALPHA2', alpha(2))
     CALL attach(fid, '/', 'RELAX', relax)
     CALL attach(fid, '/', 'NITS', nits)
     CALL attach(fid, '/', 'LEVELS', levels)
     CALL attach(fid, '/', 'NU1', nu1)
     CALL attach(fid, '/', 'NU2', nu2)
     CALL attach(fid, '/', 'NU0', nu0)
     CALL attach(fid, '/', 'MU', mu)
     CALL creatg(fid, '/mglevels')
     DO l=1,levels
        WRITE(dsname,'("/mglevels/level.",i2.2)') l
        CALL creatg(fid, TRIM(dsname))
        CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
        IF(l.GT.1) THEN
           CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
           CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
        END IF
        CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
        CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
        CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
        CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
        CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
        CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
     END DO
 !
 !  Solutions at finest grid
 !
     CALL creatg(fid, '/solutions')
     CALL putarr(fid, '/solutions/xg', grids(1)%x)
     CALL putarr(fid, '/solutions/yg', grids(1)%y)
     CALL putarr(fid, '/solutions/anal', sol_anal_grid)
     CALL putarr(fid, '/solutions/calc', sol_calc_grid)
     CALL putarr(fid, '/solutions/direct', sol_direct_grid)
 !
     CALL creatg(fid, '/Iterations')
     CALL putarr(fid, '/Iterations/residues', resid(0:nits))
     CALL putarr(fid, '/Iterations/disc_errors', errdisc(0:nits))
 !
     CALL closef(fid)
   END SUBROUTINE h5file
 !+++
 END PROGRAM
diff --git a/multigrid/src/test_mg2d_cyl.f90 b/multigrid/src/test_mg2d_cyl.f90
index 73644ec..34967dc 100644
--- a/multigrid/src/test_mg2d_cyl.f90
+++ b/multigrid/src/test_mg2d_cyl.f90
@@ -1,427 +1,427 @@
 !>
 !> @file test_mg2d_cyl.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test 2d multigrid 
 !   Cylindrical case
 !
   USE multigrid
   USE csr
   IMPLICIT NONE
   INCLUDE 'mpif.h'
 !
   DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
   INTEGER, DIMENSION(2) :: n, nidbas, ngauss
   INTEGER :: modem=22, modep=10
   CHARACTER(len=4) :: prb='poly'
   INTEGER :: levels=1, nu1=1, nu2=1, mu=1, nu0=1, nits
   CHARACTER(len=4) :: relax='jac '
   DOUBLE PRECISION :: omega=2.0d0/3.0d0, tol, rtol
   LOGICAL :: nluniq=.TRUE.
   LOGICAL :: nlfixed=.FALSE.
   DOUBLE PRECISION :: t0, tsetup(2), tdirect, tbsolve, titer, titer_per_step
   DOUBLE PRECISION :: resid_direct, errdisc_direct
   DOUBLE PRECISION :: norma, normb
 !
   DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:)
   DOUBLE PRECISION :: dx, dy
   INTEGER :: ix, iy
 !
   DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: sol_calc_grid(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: rresid(:), resid(:), errdisc(:)
 !
   DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:)
   DOUBLE PRECISION, POINTER     :: sol_direct_1d(:)
 !
   DOUBLE PRECISION, ALLOCATABLE :: sol_direct_orig(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:)
 !
   DOUBLE PRECISION, ALLOCATABLE :: sol_orig(:,:)
 !
   INTEGER :: ierr, me
   INTEGER :: l, nterms
   INTEGER :: its
 !
   TYPE(grid2d), ALLOCATABLE :: grids(:)
   TYPE(mg_info)             :: info
 !
   NAMELIST /newrun/ n, nidbas, ngauss, modem, modep, levels, prb, &
        &            nu1, nu2, mu, nu0, relax, nits, tol, rtol, nlfixed, &
        &            nluniq, omega
 !--------------------------------------------------------------------------------
 !                    1.   Prologue
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
 !   Inputs
 !
   n = (/8, 8/)
   nidbas=(/3,3/)
   ngauss=(/2,2/)
   modem = 22
   modep = 10
   prb='poly'
   levels=2
   nu1 = 1
   nu2 = 1
   mu = 1
   nu0 = 1
   nits = 10
   tol = 1.e-8
   rtol = 1.e-10
   relax = 'jac'
   nlfixed= .FALSE.
   nluniq = .TRUE.
 !
   IF(me.EQ.0) THEN
      READ(*,newrun)
      WRITE(*,newrun)
   END IF
      CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(modem, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(modep, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nu1, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nu2, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nu0, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(mu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(relax, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nluniq, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(tol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
 !
 !   Adjust number of levels and fill mg info.
 !
   levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2)))
   info%nu1 = nu1
   info%nu2 = nu2
   info%mu = mu
   info%nu0 = nu0
   info%levels = levels
   info%relax = relax
   info%omega = omega
 !
 !   Create grids
 !
   t0 = mpi_wtime()
 !
   dx = 1.0d0/REAL(n(1),8)
   dy = 2.0d0*pi/REAL(n(2),8)
   ALLOCATE(x(0:n(1)), y(0:n(2)))
   DO ix=0,n(1)
      x(ix) = ix*dx
   END DO
   DO iy=0,n(2)
      y(iy) = iy*dy
   END DO
 !
   ALLOCATE(grids(levels))
   CALL create_grid(x, y, nidbas, ngauss, [1, 0], grids, period=[.FALSE., .TRUE.], &
        &           debug_in=.FALSE.)
   WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry'
   WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels)
 !!$  CALL printmat('** Prolongation matrix in 1st dim.**', grids(2)%transf(1))
 !!$  CALL printmat('** Prolongation matrix in 2nd dim.**', grids(2)%transf(2))
 !
 !   Construct RHS and set BC only on the finest grid
 !
   CALL disrhs(grids(1)%spl, grids(1)%f, rhs)
 !!$  WRITE(*,'(a/(8(1pe12.3)))') 'Orig RHS at the axis',  grids(1)%f(1,1:n(2))
   CALL ibcrhs(grids(1), grids(1)%f, nluniq_in=nluniq)
 !!$  WRITE(*,'(a/(8(1pe12.3)))') 'RHS at the axis',  grids(1)%f(1,1:n(2))
 !
 !   Build FE matrices and set BC
 !
   nterms = 2
   DO l=1,levels
      CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms)
      CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq)
      CALL to_mat(grids(l)%mata)
   END DO
 !
 !   Set BC on grid transfer matrices
 !
   CALL ibc_transf(grids, 1, 2)   ! Only right boundary on r (1st dim.)
   tsetup(1) = mpi_wtime()-t0
 !
 !   Clear and rebuild FE matrices and set BC
 !
   t0 = mpi_wtime()
   nterms = 2
   DO l=1,levels
      CALL clear_mat(grids(l)%mata)
      CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms, noinit=.TRUE.)
      CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq)
   END DO
   tsetup(2) = mpi_wtime()-t0
 !--------------------------------------------------------------------------------
 !                   1.  Analytical solution (at the finest grid, l=1)
 !
   ALLOCATE(sol_anal_grid(0:n(1),0:n(2)))
   sol_anal_grid = sol(grids(1)%x, grids(1)%y)
 !--------------------------------------------------------------------------------
 !                   2.  Direct solution (at the finest grid, l=1)
 !
   WRITE(*,'(//a)') 'Direct solution for the finest grid problem'
 !
   ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), &
        &         source=grids(1)%f)
   ALLOCATE(sol_direct_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)))
   ALLOCATE(sol_direct_grid(0:n(1),0:n(2)))
 !
   sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct
 !
 !
   PRINT*, 'shape of sol_direct', SHAPE(sol_direct)
   PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d)
 !
   t0 = mpi_wtime()
   sol_direct = grids(1)%f
   CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
   tdirect = mpi_wtime()-t0
 !
   t0 = mpi_wtime()
   sol_direct = grids(1)%f
   CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
   resid_direct = residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d)
 !
   sol_direct_orig = sol_direct
   CALL back_transf(grids(1), sol_direct_orig, nluniq_in=nluniq)
   errdisc_direct = disc_err(grids(1)%spl, sol_direct_orig, sol)
 !
   tbsolve = mpi_wtime()-t0
 !
   CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, &
        &       [0,0], sol_direct_orig)
 !
   WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', &
        &        errdisc_direct, resid_direct
   WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
        &    NORM2(sol_anal_grid-sol_direct_grid) / NORM2(sol_anal_grid)
 !--------------------------------------------------------------------------------
 !                   3.  Test multigrid V-cycle
 !
   WRITE(*,'(/a)') 'Multigrid MG V-cycles ...'
   ALLOCATE(sol_calc_grid(0:n(1),0:n(2)))
   ALLOCATE(errdisc(0:nits))
   ALLOCATE(resid(0:nits))
   ALLOCATE(rresid(0:nits))
   ALLOCATE(sol_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)))
 !
 !   Norm of A and b
 !
   norma = matnorm(grids(1)%mata)
   normb = NORM2(grids(1)%f1d)
 !
   WRITE(*, '(a,1pe12.3)') 'Frobenius norm of A', norma
   WRITE(*, '(a,1pe12.3)') 'Infinity norm of A ', matnorm(grids(1)%mata, 'inf')
   WRITE(*, '(a,1pe12.3)') '1 norm of A        ', matnorm(grids(1)%mata, '1')
 !
 !   Initial guess
 !
   IF(nlfixed) THEN
      grids(1)%v = sol_direct
   ELSE
      grids(1)%v = 0.0d0
   END IF
 !
   sol_orig(:,:) = grids(1)%v(:,:)
   CALL back_transf(grids(1), sol_orig, nluniq_in=nluniq)
   errdisc(0) = disc_err(grids(1)%spl, sol_orig, sol)
 !
   resid(0) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d)
   rresid(0) = resid(0)  /  ( norma*NORM2(grids(1)%v1d) + normb )
   WRITE(*,'(a4,3(a12,a8),a12)') 'its', 'residue', 'ratio', 'disc. err', &
        &         'ratio', 'rel. resid', 'ratio', '||v||'
   WRITE(*,'(i4,3(1pe12.3,8x),1pe12.3)') 0, resid(0), errdisc(0), rresid(0), NORM2(grids(1)%v1d)
 !
 !   Iterations
 !
   t0 = mpi_wtime()
   DO its=1,nits
      CALL mg_cyl(grids, info, 1, nluniq_in=nluniq)
      resid(its) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d)
      rresid(its) = resid(its)  / ( norma*NORM2(grids(1)%v1d) + normb )
 !
      sol_orig(:,:) = grids(1)%v(:,:)
      CALL back_transf(grids(1), sol_orig, nluniq_in=nluniq)
      errdisc(its) = disc_err(grids(1)%spl, sol_orig, sol)
 !
      WRITE(*,'((i4,3(1pe12.3,0pf8.2)),1pe12.3)')  its, &
           &   resid(its),   resid(its)/resid(its-1), &
           &   errdisc(its), errdisc(its)/errdisc(its-1), &
           &   rresid(its),  rresid(its)/rresid(its-1), &
           &   NORM2(grids(1)%v1d)
      IF(resid(its) .LE. tol .OR. rresid(its).LE. rtol ) EXIT
   END DO
   nits = MIN(nits,its)
   titer = mpi_wtime() - t0
   titer_per_step = titer/REAL(its,8)
 !
   CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_calc_grid, &
        &       [0,0], sol_orig)
 !--------------------------------------------------------------------------------
 !                   9.  Epilogue
 !
 !   Display timing
 !
   WRITE(*,'(a,2(1pe12.3))') 'Set up time (s)            ', tsetup
   WRITE(*,'(a,2(1pe12.3))') 'Direct and solve time (s)  ', tdirect, tbsolve
   WRITE(*,'(a,1pe12.3,i5)') 'Iter time (s)              ', titer, nits
 !
 !   Creata HDF5 file
 !
   IF(me.EQ.0) CALL h5file
 !
   CALL mpi_finalize(ierr)
 !--------------------------------------------------------------------------------
 CONTAINS
 !+++
   FUNCTION rhs(r, theta)
 !
 ! Return problem RHS
 !
     USE math_util, ONLY : root_bessj
     DOUBLE PRECISION, INTENT(in) :: r, theta
     DOUBLE PRECISION :: rhs
     DOUBLE PRECISION :: nump
 !
     SELECT CASE(TRIM(prb))
     CASE('poly')
        rhs = REAL(4*(modem+1),8)*r**(modem+1)*COS(REAL(modem,8)*theta)
     CASE('bess')
        nump = root_bessj(modem, modep)
        rhs = r * nump**2 * BESSEL_JN(modem, nump*r) * COS(modem*theta)
     END SELECT
   END FUNCTION rhs
 !+++
   FUNCTION sol(r, theta)
 !
 ! Return exact problem solution
 !
     USE math_util, ONLY : root_bessj
     DOUBLE PRECISION, INTENT(in) :: r(:), theta(:)
     DOUBLE PRECISION :: sol(SIZE(r),SIZE(theta))
     DOUBLE PRECISION :: nump
     INTEGER :: j
 !
     SELECT CASE(TRIM(prb))
     CASE('poly')
        DO j=1,SIZE(theta)
           sol(:,j) = (1-r(:)**2) * r(:)**modem * COS(modem*theta(j))
        END DO
     CASE('bess')
        nump = root_bessj(modem, modep)
        DO j=1,SIZE(theta)
           sol(:,j) = BESSEL_JN(modem, nump*r(:)) * COS(modem*theta(j))
        END DO
     END SELECT
   END FUNCTION sol
 !+++
   SUBROUTINE coefeq(r, theta, idt, idw, c)
 !
 ! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV)
 !
     DOUBLE PRECISION, INTENT(in)  :: r, theta
     INTEGER, INTENT(out)           :: idt(:,:), idw(:,:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
     c(1) = r
     idt(1,1) = 1
     idt(1,2) = 0
     idw(1,1) = 1
     idw(1,2) = 0
 !
     c(2) = 1.0d0/r
     idt(2,1) = 0
     idt(2,2) = 1
     idw(2,1) = 0
     idw(2,2) = 1
   END SUBROUTINE coefeq
 !+++
   SUBROUTINE h5file
     USE futils
     CHARACTER(len=128) :: file='test_mg2d_cyl.h5'
     INTEGER :: fid
     INTEGER :: l
     CHARACTER(len=64) :: dsname
     CALL creatf(file, fid, real_prec='d')
     CALL attach(fid, '/', 'NX', n(1))
     CALL attach(fid, '/', 'NY', n(2))
     CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
     CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
     CALL attach(fid, '/', 'MODEM', modem)
     CALL attach(fid, '/', 'MODEP', modep)
     CALL attach(fid, '/', 'RELAX', relax)
     CALL attach(fid, '/', 'NITS', nits)
     CALL attach(fid, '/', 'LEVELS', levels)
     CALL attach(fid, '/', 'NU1', nu1)
     CALL attach(fid, '/', 'NU2', nu2)
     CALL attach(fid, '/', 'NU0', nu0)
     CALL attach(fid, '/', 'MU', mu)
     CALL creatg(fid, '/mglevels')
     DO l=1,levels
        WRITE(dsname,'("/mglevels/level.",i2.2)') l
        CALL creatg(fid, TRIM(dsname))
        CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
        IF(l.GT.1) THEN
           CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
           CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
        END IF
        CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
        CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
        CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
        CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
        CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
        CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
     END DO
 !
 !  Solutions at finest grid
 !
     CALL creatg(fid, '/solutions')
     CALL putarr(fid, '/solutions/xg', grids(1)%x)
     CALL putarr(fid, '/solutions/yg', grids(1)%y)
     CALL putarr(fid, '/solutions/anal', sol_anal_grid)
     CALL putarr(fid, '/solutions/calc', sol_calc_grid)
     CALL putarr(fid, '/solutions/direct', sol_direct_grid)
 !
     CALL creatg(fid, '/Iterations')
     CALL putarr(fid, '/Iterations/residues', resid(0:nits))
     CALL putarr(fid, '/Iterations/rresidues', rresid(0:nits))
     CALL putarr(fid, '/Iterations/disc_errors', errdisc(0:nits))
 !
     CALL closef(fid)
   END SUBROUTINE h5file
 !+++
 END PROGRAM
diff --git a/multigrid/src/test_mgp.f90 b/multigrid/src/test_mgp.f90
index 26f3f24..c09190d 100644
--- a/multigrid/src/test_mgp.f90
+++ b/multigrid/src/test_mgp.f90
@@ -1,242 +1,242 @@
 !>
 !> @file test_mgp.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test multigrid V-cycle for periodic problems
 !
   USE multigrid
   IMPLICIT NONE
 !
   INTEGER          :: nx=8, nidbas=1, ngauss=2, nits=40
   INTEGER          :: levels=2, nu1=1, nu2=1, mu=1, nu0=1
   CHARACTER(len=4) :: relax='jac '
   LOGICAL          :: nlfixed = .FALSE.
   DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0)
   DOUBLE PRECISION :: omega=2.0d0/3.0d0
   INTEGER          :: l, nrank, dim, its
   DOUBLE PRECISION :: errdisc_dir
   DOUBLE PRECISION, ALLOCATABLE :: u_direct(:), u_exact(:), u_calc(:)
   DOUBLE PRECISION, ALLOCATABLE :: sol_direct(:), sol_calc(:), sol_grid(:)
   DOUBLE PRECISION, ALLOCATABLE :: err(:), resid(:), errdisc(:)
   DOUBLE PRECISION, ALLOCATABLE :: errdisc_fmg(:)
 !
   TYPE(grid1d), ALLOCATABLE :: gridx(:)
   TYPE(mg_info)             :: info
 !
   NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, &
        &            relax, nits, nlfixed, levels, nu1, nu2, mu, nu0
 !--------------------------------------------------------------------------------
 !                   1.  Prologue
 !   Inputs
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
   levels = MIN(levels, get_lmax(nx))
 !
   info%nu1 = nu1
   info%nu2 = nu2
   info%mu = mu
   info%nu0 = nu0
   info%levels = levels
   info%relax = relax
   info%omega = omega
 !
 !   Create grids
 !
   ALLOCATE(gridx(levels))
   CALL create_grid(nx, nidbas, ngauss, 0, gridx, period=.TRUE.)
   WRITE(*,'(a/(20i6))') 'Number of intervals in grids', (gridx(l)%n, l=1,levels)
 !
 !   Create FE matrice and set BC u(0)=u(1)=0
 !
   DO l=1,levels
      CALL femat(gridx(l)%spl, gridx(l)%matap, coefeq)
   END DO
 !
 !   Construct RHS only on the finest grid
 !
   nrank = gridx(1)%rank    !  Rank of the system (number of unknowns)
   dim = nrank+nidbas       !  Dimension of Splines space
   CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs)
 !--------------------------------------------------------------------------------
 !                   2.  Direct solution
 !
   WRITE(*,'(//a)') 'Direct solution for the finest grid problem'
   ALLOCATE(u_direct(0:nx))
   ALLOCATE(sol_direct(nrank))
   ALLOCATE(sol_grid(dim))      ! Required by GRIDVAL
 !
   CALL direct_solve(gridx(1), sol_direct)
   sol_grid(1:nrank) = sol_direct(1:nrank)
   sol_grid(nrank+1:dim) = sol_direct(1:nidbas)
   CALL gridval(gridx(1)%spl, gridx(1)%x, u_direct, 0, sol_grid)
 !
   errdisc_dir = disc_err(gridx(1)%spl, sol_grid, sol)
   WRITE(*,'(a,1pe12.3)') 'Discretization error', errdisc_dir
 !--------------------------------------------------------------------------------
 !                   3.  Solution from MG V-cycles
 !
   WRITE(*,'(//a)') 'Multigrid MG V-cycles'
   ALLOCATE(sol_calc(nrank))
   ALLOCATE(err(0:nits))
   ALLOCATE(errdisc(0:nits))
   ALLOCATE(resid(0:nits))
 !
 !   Initial guess
 !
   sol_calc(:) = 0.0d0
   sol_grid(:) = 0.0d0
   IF(nlfixed) THEN
      sol_calc(:) = sol_direct(:)
      sol_grid(1:nrank) = sol_calc(1:nrank)
      sol_grid(nrank+1:dim) = sol_calc(1:nidbas)
   END IF
   gridx(1)%v(:) = sol_calc(:)
   err(0) = normf(gridx(1)%matmp, sol_calc-sol_direct)
   errdisc(0) = disc_err(gridx(1)%spl, sol_grid, sol)
   resid(0) = residue(gridx(1)%matap, gridx(1)%f, sol_calc)
 !
 !   Iterations
 !
   DO its=1,nits
 !
      CALL mg(gridx, info, 1)
      sol_calc(:) = gridx(1)%v(:)
      sol_grid(1:nrank) = sol_calc(1:nrank)
      sol_grid(nrank+1:dim) = sol_calc(1:nidbas)
 !
      err(its) = normf(gridx(1)%matmp, sol_calc-sol_direct)
      errdisc(its) = disc_err(gridx(1)%spl, sol_grid, sol)  ! will call GRIDVAL
      resid(its) = residue(gridx(1)%matap, gridx(1)%f, sol_calc)
   END DO
 !
   WRITE(*,'(a4,3(a12,a8))') 'its', 'error', 'ratio', 'residue', 'ratio', &
        &                'disc. err', 'ratio'
   WRITE(*,'(i4,3(1pe12.3,8x))') 0, err(0), resid(0), errdisc(0)
   WRITE(*,'((i4,3(1pe12.3,0pf8.2)))') (its, err(its), err(its)/err(its-1), &
        &   resid(its), resid(its)/resid(its-1), &
        &   errdisc(its), errdisc(its)/errdisc(its-1), its=1,nits)
 !--------------------------------------------------------------------------------
 !                   4.  Solution from FMG
 !
   WRITE(*,'(//a)') 'Full Multigrid'
 
   ALLOCATE(errdisc_fmg(nits))
   DO its=1,nits
      info%nu0 = its
 !
      CALL fmg(gridx, info, 1)
      sol_calc(:) = gridx(1)%v(:)
      sol_grid(1:nrank) = sol_calc(1:nrank)
      sol_grid(nrank+1:dim) = sol_calc(1:nidbas)
 !
      errdisc_fmg(its) = disc_err(gridx(1)%spl, sol_grid, sol)  ! will call GRIDVAL
      resid(its) = residue(gridx(1)%matap, gridx(1)%f, sol_calc)
   END DO
   WRITE(*,'(a4,2(a12,a8))') 'nu0', 'residue', 'ratio','disc. err', 'ratio'
   WRITE(*,'((i4,2(1pe12.3,0pf8.3)))') (its, resid(its), resid(its)/resid(its-1), &
        &     errdisc_fmg(its),errdisc_fmg(its)/errdisc_dir, its=1,nits)
 !
 !   Grid values at final iteration
 !
   ALLOCATE(u_exact(0:nx))
   ALLOCATE(u_calc(0:nx))
   u_exact = sol(gridx(1)%x)
   CALL gridval(gridx(1)%spl, gridx(1)%x, u_calc, 0, sol_grid)
 !--------------------------------------------------------------------------------
 !                   9.  Epilogue
 !
 !   Creata HDF5 file
 !
   CALL h5file
 !--------------------------------------------------------------------------------
 CONTAINS
   FUNCTION rhs(x)
     DOUBLE PRECISION, INTENT(in) :: x
     DOUBLE PRECISION :: rhs
     rhs = SIN(pi*kmode*x)
   END FUNCTION rhs
   FUNCTION sol(x)
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: sol(SIZE(x))
     sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:))
   END FUNCTION sol
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
        c(1) = 1.0d0
        idt(1) = 1
        idw(1) = 1
        c(2) = sigma
        idt(2) = 0
        idw(2) = 0
   END SUBROUTINE coefeq
   SUBROUTINE h5file
     USE futils
     CHARACTER(len=128) :: file='test_mgp.h5'
     INTEGER :: fid
     INTEGER :: l
     CHARACTER(len=64) :: dsname
     CALL creatf(file, fid, real_prec='d')
     CALL attach(fid, '/', 'NX', nx)
     CALL attach(fid, '/', 'NIDBAS', nidbas)
     CALL attach(fid, '/', 'SIGMA', sigma)
     CALL attach(fid, '/', 'KMODE', kmode)
     CALL attach(fid, '/', 'RELAX', relax)
     CALL attach(fid, '/', 'NITS', nits)
     CALL attach(fid, '/', 'NLFIXED', nlfixed)
     CALL attach(fid, '/', 'LEVELS', levels)
     CALL attach(fid, '/', 'NU1', nu1)
     CALL attach(fid, '/', 'NU2', nu2)
     CALL attach(fid, '/', 'NU0', nu0)
     CALL attach(fid, '/', 'MU', mu)
     CALL creatg(fid, '/mglevels')
     DO l=1,levels
        WRITE(dsname,'("/mglevels/level.",i2.2)') l
        CALL creatg(fid, TRIM(dsname))
        CALL putarr(fid, TRIM(dsname)//'/mata', gridx(l)%matap%val)
        IF(l.GT.1) THEN
           CALL putarr(fid, TRIM(dsname)//'/matp', gridx(l)%transf%val)
           CALL attach(fid, TRIM(dsname)//'/matp', 'M', gridx(l)%transf%mrows)
           CALL attach(fid, TRIM(dsname)//'/matp', 'N', gridx(l)%transf%ncols)
        END IF
        CALL putarr(fid, TRIM(dsname)//'/f', gridx(l)%f)
        CALL putarr(fid, TRIM(dsname)//'/v', gridx(l)%v)
     END DO
     CALL creatg(fid, '/Iterations')
     CALL putarr(fid, '/Iterations/errors', err)
     CALL putarr(fid, '/Iterations/residues', resid)
     CALL putarr(fid, '/Iterations/disc_errors', errdisc)
     CALL putarr(fid, '/Iterations/disc_errors_fmg', errdisc_fmg)
     CALL putarr(fid, '/Iterations/xgrid', gridx(1)%x)
     CALL putarr(fid, '/Iterations/u_direct', u_direct)
     CALL putarr(fid, '/Iterations/u_exact', u_exact)
     CALL putarr(fid, '/Iterations/u_calc', u_calc)
     CALL closef(fid)
   END SUBROUTINE h5file
 END PROGRAM main
diff --git a/multigrid/src/test_relax.f90 b/multigrid/src/test_relax.f90
index 9d00c66..c9de563 100644
--- a/multigrid/src/test_relax.f90
+++ b/multigrid/src/test_relax.f90
@@ -1,227 +1,227 @@
 !>
 !> @file test_relax.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test different relaxations
 !
   USE multigrid
   USE math_util, ONLY : root_bessj
   IMPLICIT NONE
 !
   INTEGER          :: nx=8, nidbas=1, alpha=0, nits=40
   INTEGER          :: modem=22, modep=10
   CHARACTER(len=4) :: relax='jac '
   LOGICAL          :: nlfixed = .FALSE.
   DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0)
   DOUBLE PRECISION :: omega=2.0d0/3.0d0
   INTEGER          :: ngauss, i, nrank, its
   DOUBLE PRECISION, ALLOCATABLE :: u_exact(:), u_direct(:), u_calc(:)
   DOUBLE PRECISION, ALLOCATABLE :: sol_direct(:)
   DOUBLE PRECISION, ALLOCATABLE :: sol_calc(:), err(:), resid(:), errdisc(:)
   DOUBLE PRECISION :: errdisc_dir
 !
   TYPE(grid1d) :: gridx(1)
 !
   NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, modem, modep, &
        &            alpha, relax, omega, nits, nlfixed
 !--------------------------------------------------------------------------------
 !                   1.  Prologue: read input, construct matrix and RHS
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Set grid
 !
   CALL create_grid(nx, nidbas, ngauss, alpha, gridx)
 !
 !   Create FE matrice and set BC u(0)=u(1)=0
 !
   CALL femat(gridx(1)%spl, gridx(1)%mata, coefeq)
   nrank = gridx(1)%rank
 !
 !     Left Dirichlet BC (only for Cartesian geometry)
   IF(alpha .EQ. 0) THEN
      CALL ibcmat(1, gridx(1)%mata)
   END IF
 !
 !     Right Dirichlet BC
   CALL ibcmat(nrank, gridx(1)%mata)
 !
 !   Construct RHS
 !
   CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs)
 !
 !     Left Dirichlet BC (only for Cartesian geometry)
   IF(alpha .EQ. 0) THEN
      gridx(1)%f(1) = 0.0d0
   END IF
 !
 !     Right Dirichlet BC
   gridx(1)%f(nrank) = 0.0d0
 !--------------------------------------------------------------------------------
 !                   2.  Direct solution
 !
 !   Direct solutions
 !
   ALLOCATE(sol_direct(nrank))
   CALL direct_solve(gridx(1), sol_direct)
 !
 !  Grid values
 !
   ALLOCATE(u_exact(0:nx))
   ALLOCATE(u_direct(0:nx))
   ALLOCATE(u_calc(0:nx))
 !
   u_exact = sol(gridx(1)%x)
   CALL gridval(gridx(1)%spl, gridx(1)%x, u_direct, 0, sol_direct)
   errdisc_dir = disc_err(gridx(1)%spl, sol_direct, sol)
   WRITE(*,'(a,1pe12.3)') 'Discretization error', errdisc_dir
 !--------------------------------------------------------------------------------
 !                   3.  Relaxation
 !
   ALLOCATE(sol_calc(nrank))
   ALLOCATE(err(0:nits))
   ALLOCATE(errdisc(0:nits))
   ALLOCATE(resid(0:nits))
 !
 !   Initial guess
   sol_calc(:) = 0.0d0
   IF(nlfixed) THEN
      sol_calc(:) = sol_direct(:)
   END IF
   err(0) = normf(gridx(1)%matm, sol_calc-sol_direct)
   errdisc(0) = disc_err(gridx(1)%spl, sol_calc, sol)
   resid(0) = residue(gridx(1)%mata, gridx(1)%f, sol_calc)
 !
 !   Iterations
   DO its=1,nits
      SELECT CASE (TRIM(relax))
      CASE('jac')
         CALL jacobi(gridx(1)%mata, omega, 1, sol_calc, gridx(1)%f)
      CASE('gs')
         CALL gs(gridx(1)%mata, 1, sol_calc, gridx(1)%f)
      END SELECT
      err(its) = normf(gridx(1)%matm, sol_calc-sol_direct)
      errdisc(its) = disc_err(gridx(1)%spl, sol_calc, sol)
      resid(its) = residue(gridx(1)%mata, gridx(1)%f, sol_calc)
   END DO
   CALL gridval(gridx(1)%spl, gridx(1)%x, u_calc, 0, sol_calc)
 !
   WRITE(*,'(/a4,3a12)') 'its', 'error', 'residue', 'disc. err'
   WRITE(*,'(i4,3(1pe12.3))') 0, err(0), resid(0), errdisc(0)
   WRITE(*,'((i4,6(1pe12.3)))') (its, err(its), resid(its), errdisc(its), &
        &   err(its)/err(its-1), resid(its)/resid(its-1), &
        &   errdisc(its)/errdisc(its-1), its=1,nits,MAX(1,nits/10))
 !
   CALL h5file
 !--------------------------------------------------------------------------------
 CONTAINS
 !+++
   FUNCTION rhs(x)
     DOUBLE PRECISION, INTENT(in) :: x
     DOUBLE PRECISION :: rhs
     DOUBLE PRECISION :: nump
     SELECT CASE (alpha)
     CASE(0)                  ! Cartesian geometry
        rhs = SIN(pi*kmode*x)
     CASE(1)                  ! Cylindrical
        nump = root_bessj(modem, modep)
        rhs = x * nump**2 * bessel_jn(modem, nump*x)
     END SELECT
   END FUNCTION rhs
 !+++
   FUNCTION sol(x)
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: sol(SIZE(x))
     DOUBLE PRECISION :: nump
     SELECT CASE (alpha)
     CASE(0)                  ! Cartesian geometry
        sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:))
     CASE(1)                  ! Cylindrical
        nump = root_bessj(modem, modep)
        sol(:) = bessel_jn(modem, nump*x(:))
     END SELECT
   END FUNCTION sol
 !+++
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
     SELECT CASE (alpha)
     CASE(0)           ! Cartesian geometry
        c(1) = 1.0d0
        idt(1) = 1
        idw(1) = 1
        c(2) = sigma
        idt(2) = 0
        idw(2) = 0
     CASE(1)           ! Cylindrical
        c(1) = x
        idt(1) = 1
        idw(1) = 1
        c(2) = REAL(modem,8)**2/x
        idt(2) = 0
        idw(2) = 0
     END SELECT
   END SUBROUTINE coefeq
 !+++
   SUBROUTINE h5file
     USE futils
     CHARACTER(len=128) :: file='test_relax.h5'
     INTEGER :: fid
     INTEGER :: l
     CHARACTER(len=64) :: dsname
     CALL creatf(file, fid, real_prec='d')
     CALL attach(fid, '/', 'NX', nx)
     CALL attach(fid, '/', 'NIDBAS', nidbas)
     CALL attach(fid, '/', 'KX', kmode)
     CALL attach(fid, '/', 'SIGMA', sigma)
     CALL attach(fid, '/', 'ALPHA', alpha)
     CALL attach(fid, '/', 'RELAX', relax)
     CALL attach(fid, '/', 'OMEGA', omega)
     CALL attach(fid, '/', 'NITS', nits)
     CALL attach(fid, '/', 'MODEM', modem)
     CALL attach(fid, '/', 'MODEP', modep)
 !
 !  Solutions at finest grid
 !
     CALL creatg(fid, '/solutions')
     CALL putarr(fid, '/solutions/xg', gridx(1)%x)
     CALL putarr(fid, '/solutions/direct', u_direct)
     CALL putarr(fid, '/solutions/anal', u_exact)
     CALL putarr(fid, '/solutions/calc', u_calc)
 !
     CALL creatg(fid, '/relaxation')
     CALL putarr(fid, '/relaxation/errdisc', errdisc)
     CALL putarr(fid, '/relaxation/resid', resid)
 !
 !  Store FE matrix
 !
     CALL putmat(fid, '/MATA', gridx(1)%mata)
 !
     CALL closef(fid)
   END SUBROUTINE h5file
 !+++
 END PROGRAM main
diff --git a/multigrid/src/test_relax2d.f90 b/multigrid/src/test_relax2d.f90
index 09583c5..932dfd5 100644
--- a/multigrid/src/test_relax2d.f90
+++ b/multigrid/src/test_relax2d.f90
@@ -1,334 +1,334 @@
 !>
 !> @file test_relax2d.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test 2d direcxt solve and relaxation methods
 !
   USE multigrid
   USE csr
   IMPLICIT NONE
   INCLUDE 'mpif.h'
 !
   DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
   INTEGER, DIMENSION(2) :: n, nidbas, ngauss, alpha
   DOUBLE PRECISION :: kx=4.d0, ky=3.d0, sigma=10.0d0
   INTEGER :: levels=1, nits=1000
   CHARACTER(len=4) :: relax='jac '
   DOUBLE PRECISION :: omega=2.0d0/3.0d0
   DOUBLE PRECISION :: t0
 !
   DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:)
   DOUBLE PRECISION :: dx, dy
   INTEGER :: ix, iy
 !
   DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:), sol_relax(:,:)
   DOUBLE PRECISION, POINTER     :: sol_direct_1d(:), sol_relax_1d(:)
   DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: resid(:), errdisc(:)
 !
   INTEGER :: ierr, me
   INTEGER :: l, nterms
   INTEGER :: its
 !
   TYPE(grid2d), ALLOCATABLE :: grids(:)
 !
   NAMELIST /newrun/ n, nidbas, ngauss, kx, ky, sigma, alpha, levels, nits, relax
 !--------------------------------------------------------------------------------
 !                    1.   Prologue
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
 !   Inputs
 !
   n = (/8, 8/)
   nidbas=(/3,3/)
   ngauss=(/2,2/)
   alpha = (/0,0/)
   kx=4
   ky=3
   sigma=10.0d0
   levels=2
   relax='jac'
   nits=100
 !
   IF(me.EQ.0) THEN
      READ(*,newrun)
      WRITE(*,newrun)
   END IF
      CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(alpha, 2, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(kx, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(ky, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(sigma, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(relax, LEN(relax), MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
 !
 !   Adjust number of levels
 !
   levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2)))
 !
 !   Create grids
 !
   dx = 1.0d0/REAL(n(1),8)
   dy = 1.0d0/REAL(n(2),8)
   ALLOCATE(x(0:n(1)), y(0:n(2)))
   DO ix=0,n(1)
      x(ix) = ix*dx
   END DO
   DO iy=0,n(2)
      y(iy) = iy*dy
   END DO
 !
   ALLOCATE(grids(levels))
   CALL create_grid(x, y, nidbas, ngauss, alpha, grids)
   WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry'
   WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels)
 !
 !   Construct RHS and set BC only on the finest grid
 !
   CALL disrhs(grids(1)%spl, grids(1)%f, rhs)
   CALL ibcrhs(grids(1), grids(1)%f)
 !!$  CALL printmat('** RHS **', grids(1)%f)
 !
 !   Build FE matrices and set BC
 !
   nterms = 3
   DO l=1,levels
      CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms)
      CALL ibcmat(grids(l), grids(l)%mata)
      CALL to_mat(grids(l)%mata)
   END DO
 !--------------------------------------------------------------------------------
 !                   1.  Direct solution (at the finest grid, l=1)
 !
   WRITE(*,'(//a)') 'Direct solution for the finest grid problem'
   ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), &
        &         source=grids(1)%f)
   sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct
   PRINT*, 'shape of sol_direct', SHAPE(sol_direct)
   PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d)
 !
   t0 = mpi_wtime()
   CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
   WRITE(*,'(a,1pe12.3)') 'Fact. + solve time (s) =', mpi_wtime()-t0
 !
   sol_direct = grids(1)%f
   t0 = mpi_wtime()
   CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
   WRITE(*,'(a,1pe12.3)') 'Solve time (s) =', mpi_wtime()-t0
 !
   ALLOCATE(sol_direct_grid(0:n(1),0:n(2)))
   ALLOCATE(sol_anal_grid(0:n(1),0:n(2)))
   CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, &
        &       [0,0], sol_direct)
   sol_anal_grid = sol(grids(1)%x, grids(1)%y)
   WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', &
        &        disc_err(grids(1)%spl, sol_direct, sol), &
        &        residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d)
 !--------------------------------------------------------------------------------
 !                   2.  Relaxation (at the finest grid, l=1)
 !
   ALLOCATE(errdisc(0:nits))
   ALLOCATE(resid(0:nits))
   ALLOCATE(sol_relax(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)))
   sol_relax_1d(1:SIZE(grids(1)%v1d)) => sol_relax
 !
   sol_relax_1d = 0.0d0
   errdisc(0) = disc_err(grids(1)%spl, sol_relax, sol)
   resid(0) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d)
   t0 = mpi_wtime()
   DO its=1,nits
      SELECT CASE (TRIM(relax))
      CASE('jac')
         CALL jacobi(grids(1)%mata, omega, 1, sol_relax_1d, grids(1)%f1d)
      CASE('gs')
         CALL gs(grids(1)%mata, 1, sol_relax_1d, grids(1)%f1d)
      END SELECT
      errdisc(its) = disc_err(grids(1)%spl, sol_relax, sol)
      resid(its) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d)
   END DO
   WRITE(*,'(a,1pe12.3)') 'Iterative solve time (s/iteration) =', (mpi_wtime()-t0)/REAL(nits,8)
 !
   WRITE(*,'(/a4,3a12)') 'its', 'residue', 'disc. err'
   WRITE(*,'(i4,3(1pe12.3))') 0, resid(0), errdisc(0)
   WRITE(*,'((i4,4(1pe12.3)))') (its, resid(its), errdisc(its), &
        &   resid(its)/resid(its-1), &
        &   errdisc(its)/errdisc(its-1), its=1,nits,MAX(1,nits/10))
 !--------------------------------------------------------------------------------
 !                   9.  Epilogue
 !
 !   Creata HDF5 file
 !
   IF(me.EQ.0) CALL h5file
 !
   CALL mpi_finalize(ierr)
 !--------------------------------------------------------------------------------
 CONTAINS
 !+++
   FUNCTION rhs(x, y)
 !
 ! Return problem RHS
 !
     DOUBLE PRECISION, INTENT(in) :: x, y
     DOUBLE PRECISION :: rhs
     rhs = SIN(PI*kx*x)*SIN(PI*ky*y)
   END FUNCTION rhs
 !+++
   FUNCTION sol(x, y)
 !
 ! Return exact problem solution
 !
     DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
     DOUBLE PRECISION :: sol(SIZE(x),SIZE(y))
     DOUBLE PRECISION :: c
     INTEGER :: j
     DO j=1,SIZE(y)
        c = SIN(PI*ky*y(j)) / (PI**2*(kx**2+ky**2) + sigma**2)
        sol(:,j) = c * SIN(PI*kx*x(:))
     END DO
   END FUNCTION sol
 !+++
   SUBROUTINE coefeq(x, y, idt, idw, c)
 !
 ! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV)
 !
     DOUBLE PRECISION, INTENT(in)  :: x, y
     INTEGER, INTENT(out)           :: idt(:,:), idw(:,:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
     c(1) = 1.0d0
     idt(1,1) = 1
     idt(1,2) = 0
     idw(1,1) = 1
     idw(1,2) = 0
 !
     c(2) = 1.0d0
     idt(2,1) = 0
     idt(2,2) = 1
     idw(2,1) = 0
     idw(2,2) = 1
 !
     c(3) = sigma
     idt(3,1) = 0
     idt(3,2) = 0
     idw(3,1) = 0
     idw(3,2) = 0
 
   END SUBROUTINE coefeq
 !+++
   SUBROUTINE h5file
     USE futils
     CHARACTER(len=128) :: file='test_relax2d.h5'
     INTEGER :: fid
     INTEGER :: l
     CHARACTER(len=64) :: dsname
     CALL creatf(file, fid, real_prec='d')
     CALL attach(fid, '/', 'NX', n(1))
     CALL attach(fid, '/', 'NY', n(2))
     CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
     CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
     CALL attach(fid, '/', 'KX', kx)
     CALL attach(fid, '/', 'KY', ky)
     CALL attach(fid, '/', 'SIGMA', sigma)
     CALL attach(fid, '/', 'ALPHA1', alpha(1))
     CALL attach(fid, '/', 'ALPHA2', alpha(2))
     CALL attach(fid, '/', 'LEVELS', levels)
     CALL attach(fid, '/', 'RELAX', relax)
     CALL attach(fid, '/', 'NITS', nits)
     CALL creatg(fid, '/mglevels')
     DO l=1,levels
        WRITE(dsname,'("/mglevels/level.",i2.2)') l
        CALL creatg(fid, TRIM(dsname))
        CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
        IF(l.GT.1) THEN
           CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
           CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
        END IF
        CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
        CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
        CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
        CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
        CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
        CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
     END DO
 !
 !  Solutions at finest grid
 !
     CALL creatg(fid, '/solutions')
     CALL putarr(fid, '/solutions/xg', grids(1)%x)
     CALL putarr(fid, '/solutions/yg', grids(1)%y)
     CALL putarr(fid, '/solutions/direct', sol_direct_grid)
     CALL putarr(fid, '/solutions/anal', sol_anal_grid)
 !
     CALL creatg(fid, '/relaxation')
     CALL putarr(fid, '/relaxation/errdisc', errdisc)
     CALL putarr(fid, '/relaxation/resid', resid)
 !
     IF(ALLOCATED(grids(1)%mata%mumps)) THEN
        CALL myputmat(fid, '/MUMPS', grids(1)%mata%mumps)
     END IF
 !
     CALL closef(fid)
   END SUBROUTINE h5file
 !+++
   SUBROUTINE myputmat(fid, label, mat, str)
 !
 !   Write matrix to hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)                    :: fid
     CHARACTER(len=*), INTENT(in)           :: label
     TYPE(mumps_mat)                        :: mat
     CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
     CHARACTER(len=128) :: mumps_grp
 !
     IF(PRESENT(str)) THEN
        CALL creatg(fid, label, str)
     ELSE
        CALL creatg(fid, label)
     END IF
     CALL attach(fid, label, 'RANK', mat%rank)
     CALL attach(fid, label, 'NNZ',  mat%nnz)
     CALL attach(fid, label, 'NLSYM', mat%nlsym)
     CALL attach(fid, label, 'NLPOS', mat%nlpos)
     CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
     CALL putarr(fid, TRIM(label)//'/cols', mat%mumps_par%JCN_loc)
     CALL putarr(fid, TRIM(label)//'/val', mat%mumps_par%A_loc)
 !
     mumps_grp = TRIM(label)//'/mumps_par'
     CALL creatg(fid, mumps_grp)
     CALL attach(fid, mumps_grp, 'PAR', mat%mumps_par%PAR)
     CALL attach(fid, mumps_grp, 'SYM', mat%mumps_par%SYM)
     CALL putarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN_loc)
 !
   END SUBROUTINE myputmat
 END PROGRAM
diff --git a/multigrid/src/test_relax2d_cyl.f90 b/multigrid/src/test_relax2d_cyl.f90
index c02b199..d26d308 100644
--- a/multigrid/src/test_relax2d_cyl.f90
+++ b/multigrid/src/test_relax2d_cyl.f90
@@ -1,369 +1,369 @@
 !>
 !> @file test_relax2d_cyl.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test 2d direcxt solve and relaxation methods
 !   Cylindrical case
 !
   USE multigrid
   USE csr
   IMPLICIT NONE
   INCLUDE 'mpif.h'
 !
   DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
   INTEGER, DIMENSION(2) :: n, nidbas, ngauss
   INTEGER :: modem=22, modep=10
   INTEGER :: levels=1, nits=1000
   CHARACTER(len=4) :: relax='jac ', prb='poly'
   DOUBLE PRECISION :: omega=2.0d0/3.0d0
   LOGICAL :: nluniq=.TRUE.
   LOGICAL :: nlfixed=.FALSE.
   DOUBLE PRECISION :: t0
   DOUBLE PRECISION :: resid_direct, errdisc_direct
 !
   DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:)
   DOUBLE PRECISION :: dx, dy
   INTEGER :: ix, iy
 !
   DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:), sol_relax(:,:)
   DOUBLE PRECISION, POINTER     :: sol_direct_1d(:), sol_relax_1d(:)
   DOUBLE PRECISION, ALLOCATABLE :: sol_direct_orig(:,:), sol_relax_orig(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: sol_relax_grid(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: resid(:), errdisc(:)
 !
   INTEGER :: ierr, me
   INTEGER :: l, nterms, j
   INTEGER :: its
 !
   TYPE(grid2d), ALLOCATABLE :: grids(:)
 !
   NAMELIST /newrun/ n, nidbas, ngauss, modem, modep, levels, omega, nits, &
        &            relax, prb, nlfixed, nluniq
 !--------------------------------------------------------------------------------
 !                    1.   Prologue
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
 !   Inputs
 !
   n = (/8, 8/)
   nidbas=(/3,3/)
   ngauss=(/2,2/)
   modem = 22
   modep = 10
   levels=2
   relax='jac'
   prb='poly'
   nits=100
   nluniq = .TRUE.
 !
   IF(me.EQ.0) THEN
      READ(*,newrun)
      WRITE(*,newrun)
   END IF
      CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(modem, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(modep, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nluniq, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(relax, LEN(relax), MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
 !
 !   Adjust number of levels
 !
   levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2)))
 !
 !   Create grids
 !
   dx = 1.0d0/REAL(n(1),8)
   dy = 2.0d0*pi/REAL(n(2),8)
   ALLOCATE(x(0:n(1)), y(0:n(2)))
   DO ix=0,n(1)
      x(ix) = ix*dx
   END DO
   DO iy=0,n(2)
      y(iy) = iy*dy
   END DO
 !
   ALLOCATE(grids(levels))
   CALL create_grid(x, y, nidbas, ngauss, [1, 0], grids, period=[.FALSE., .TRUE.], &
        &         debug_in=.FALSE.)
   WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry'
   WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels)
 !
 !   Construct RHS and set BC only on the finest grid
 !
   CALL disrhs(grids(1)%spl, grids(1)%f, rhs)
   CALL ibcrhs(grids(1), grids(1)%f, nluniq_in=nluniq)
 !!$  CALL printmat('** RHS **', grids(1)%f)
 !
 !   Build FE matrices and set BC
 !
   nterms = 2
   DO l=1,levels
      CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms)
      CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq)
      CALL to_mat(grids(l)%mata)
   END DO
 !--------------------------------------------------------------------------------
 !                   1.  Direct solution (at the finest grid, l=1)
 !
   WRITE(*,'(//a)') 'Direct solution for the finest grid problem'
 !
   ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), &
        &         source=grids(1)%f)
   sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct
 !
   ALLOCATE(sol_direct_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)))
 !
   PRINT*, 'shape of sol_direct', SHAPE(sol_direct)
   PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d)
 !
   t0 = mpi_wtime()
   CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
   WRITE(*,'(a,1pe12.3)') 'Fact. + solve time (s) =', mpi_wtime()-t0
 !
   sol_direct = grids(1)%f
   t0 = mpi_wtime()
   CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
   resid_direct = residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d)
 !
   sol_direct_orig = sol_direct
   CALL back_transf(grids(1), sol_direct_orig, nluniq_in=nluniq)
   errdisc_direct = disc_err(grids(1)%spl, sol_direct_orig, sol)
 !
   WRITE(*,'(a,1pe12.3)') 'Solve time (s) =', mpi_wtime()-t0
 !
   ALLOCATE(sol_direct_grid(0:n(1),0:n(2)))
   ALLOCATE(sol_anal_grid(0:n(1),0:n(2)))
 !
   CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, &
        &       [0,0], sol_direct_orig)
 !
   sol_anal_grid = sol(grids(1)%x, grids(1)%y)
   WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', &
        &        errdisc_direct, resid_direct
   WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
        &    NORM2(sol_anal_grid-sol_direct_grid) / NORM2(sol_anal_grid)
 !--------------------------------------------------------------------------------
 !                   2.  Relaxation (at the finest grid, l=1)
 !
   ALLOCATE(errdisc(0:nits))
   ALLOCATE(resid(0:nits))
   ALLOCATE(sol_relax(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)))
   ALLOCATE(sol_relax_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)))
   sol_relax_1d(1:SIZE(grids(1)%v1d)) => sol_relax
 !
 !   Initial guess
 !
   IF(nlfixed) THEN
      sol_relax = sol_direct    ! Test fixed point\
   ELSE
      sol_relax = 0.0d0
   END IF
 !
   sol_relax_orig = sol_relax
   CALL back_transf(grids(1), sol_relax_orig, nluniq_in=nluniq)
   errdisc(0) = disc_err(grids(1)%spl, sol_relax_orig, sol)
   resid(0) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d)
 !
   t0 = mpi_wtime()
   DO its=1,nits
      SELECT CASE (TRIM(relax))
      CASE('jac')
         CALL jacobi(grids(1)%mata, omega, 1, sol_relax_1d, grids(1)%f1d)
      CASE('gs')
         CALL gs(grids(1)%mata, 1, sol_relax_1d, grids(1)%f1d)
      END SELECT
      resid(its) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d)
 !
      sol_relax_orig = sol_relax
      CALL back_transf(grids(1), sol_relax_orig, nluniq_in=nluniq)
 !
      errdisc(its) = disc_err(grids(1)%spl, sol_relax_orig, sol)
   END DO
   WRITE(*,'(a,1pe12.3)') 'Iterative solve time (s/iteration) =', (mpi_wtime()-t0)/REAL(nits,8)
 !
   ALLOCATE(sol_relax_grid(0:n(1),0:n(2)))
   CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_relax_grid, &
        &       [0,0], sol_relax_orig)
 !
   WRITE(*,'(/a4,3a12)') 'its', 'residue', 'disc. err'
   WRITE(*,'(i4,3(1pe12.3))') 0, resid(0), errdisc(0)
   WRITE(*,'((i4,4(1pe12.3)))') (its, resid(its), errdisc(its), &
        &   resid(its)/resid(its-1), &
        &   errdisc(its)/errdisc(its-1), its=1,nits,MAX(1,nits/10))
 !--------------------------------------------------------------------------------
 !                   9.  Epilogue
 !
 !   Creata HDF5 file
 !
   IF(me.EQ.0) CALL h5file
 !
   CALL mpi_finalize(ierr)
 !--------------------------------------------------------------------------------
 CONTAINS
 !+++
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:,:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i, j
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        DO j=1,SIZE(x,2)
           sum2 = sum2 + x(i,j)**2
        END DO
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
 !+++
   FUNCTION rhs(r, theta)
 !
 ! Return problem RHS
 !
     USE math_util, ONLY : root_bessj
     DOUBLE PRECISION, INTENT(in) :: r, theta
     DOUBLE PRECISION :: rhs
     DOUBLE PRECISION :: nump
 !
     SELECT CASE(TRIM(prb))
     CASE('poly')
        rhs = REAL(4*(modem+1),8)*r**(modem+1)*COS(REAL(modem,8)*theta)
     CASE('bess')
        nump = root_bessj(modem, modep)
        rhs = r * nump**2 * BESSEL_JN(modem, nump*r) * COS(modem*theta)
     END SELECT
   END FUNCTION rhs
 !+++
   FUNCTION sol(r, theta)
 !
 ! Return exact problem solution
 !
     USE math_util, ONLY : root_bessj
     DOUBLE PRECISION, INTENT(in) :: r(:), theta(:)
     DOUBLE PRECISION :: sol(SIZE(r),SIZE(theta))
     DOUBLE PRECISION :: nump
     INTEGER :: j
 !
     SELECT CASE(TRIM(prb))
     CASE('poly')
        DO j=1,SIZE(theta)
           sol(:,j) = (1-r(:)**2) * r(:)**modem * COS(modem*theta(j))
        END DO
     CASE('bess')
        nump = root_bessj(modem, modep)
        DO j=1,SIZE(theta)
           sol(:,j) = BESSEL_JN(modem, nump*r(:)) * COS(modem*theta(j))
        END DO
     END SELECT
   END FUNCTION sol
 !+++
   SUBROUTINE coefeq(r, theta, idt, idw, c)
 !
 ! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV)
 !
     DOUBLE PRECISION, INTENT(in)  :: r, theta
     INTEGER, INTENT(out)           :: idt(:,:), idw(:,:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
     c(1) = r
     idt(1,1) = 1
     idt(1,2) = 0
     idw(1,1) = 1
     idw(1,2) = 0
 !
     c(2) = 1.0d0/r
     idt(2,1) = 0
     idt(2,2) = 1
     idw(2,1) = 0
     idw(2,2) = 1
   END SUBROUTINE coefeq
 !+++
   SUBROUTINE h5file
     USE futils
     CHARACTER(len=128) :: file='test_relax2d_cyl.h5'
     INTEGER :: fid
     INTEGER :: l
     CHARACTER(len=64) :: dsname
     CALL creatf(file, fid, real_prec='d')
     CALL attach(fid, '/', 'NX', n(1))
     CALL attach(fid, '/', 'NY', n(2))
     CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
     CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
     CALL attach(fid, '/', 'MODEM', modem)
     CALL attach(fid, '/', 'MODEP', modep)
     CALL attach(fid, '/', 'LEVELS', levels)
     CALL attach(fid, '/', 'RELAX', relax)
     CALL attach(fid, '/', 'NITS', nits)
     CALL attach(fid, '/', 'OMEGA', omega)
     CALL creatg(fid, '/mglevels')
     DO l=1,levels
        WRITE(dsname,'("/mglevels/level.",i2.2)') l
        CALL creatg(fid, TRIM(dsname))
        CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
        IF(l.GT.1) THEN
           CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
           CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
        END IF
        CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
        CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
        CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
        CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
        CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
        CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
     END DO
 !
 !  Solutions at finest grid
 !
     CALL creatg(fid, '/solutions')
     CALL putarr(fid, '/solutions/xg', grids(1)%x)
     CALL putarr(fid, '/solutions/yg', grids(1)%y)
     CALL putarr(fid, '/solutions/direct', sol_direct_grid)
     CALL putarr(fid, '/solutions/relax', sol_relax_grid)
     CALL putarr(fid, '/solutions/anal', sol_anal_grid)
 !
     CALL creatg(fid, '/relaxation')
     CALL putarr(fid, '/relaxation/errdisc', errdisc)
     CALL putarr(fid, '/relaxation/resid', resid)
 !
     CALL closef(fid)
   END SUBROUTINE h5file
 !+++
 END PROGRAM
diff --git a/multigrid/src/test_stencil.f90 b/multigrid/src/test_stencil.f90
index e1a0dc0..934af01 100644
--- a/multigrid/src/test_stencil.f90
+++ b/multigrid/src/test_stencil.f90
@@ -1,238 +1,238 @@
 !>
 !> @file test_stencil.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE mod
   USE iso_fortran_env, ONLY : real64
   IMPLICIT NONE
 !
   INTEGER, PARAMETER     :: rkind = real64
   LOGICAL, PARAMETER     :: nldebug=.FALSE.
   REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0)
 CONTAINS
 END MODULE mod
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 PROGRAM main
   USE mpi
   USE pputils2, ONLY : dist1d, exchange, norm2_vec=>ppnorm2
   USE stencil, ONLY  : stencil_2d, init, laplacian, vmx, putmat
   USE mod
   IMPLICIT NONE
 !
   INTEGER, PARAMETER :: ndims=2
 !
   INTEGER                   :: me, neighs(4), npes, ierr
   INTEGER, DIMENSION(ndims) :: dims=[0,0]
   INTEGER, DIMENSION(ndims) :: coords, comm1d
   LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.]
   LOGICAL                   :: reorder =.FALSE.
   INTEGER                   :: comm_cart
 !
   INTEGER                   :: nx=4, ny=4    ! Number of intervals
   INTEGER, DIMENSION(ndims) :: e, s, lb, ub, npt_glob, npt_loc
 !
   REAL(rkind), ALLOCATABLE  :: xgrid(:), ygrid(:)
   REAL(rkind), ALLOCATABLE  :: arr(:,:), fexact(:,:)
   REAL(rkind), ALLOCATABLE  :: barr1(:,:), barr2(:,:), barr3(:,:)
   REAL(rkind)               :: dx, dy
   REAL(rkind)               :: err
   INTEGER, DIMENSION(5,2)   :: id       ! 5-point stencil 
   INTEGER                   :: npoints
   TYPE(stencil_2d)          :: mat
   INTEGER                   :: i, j
 !
   NAMELIST /in/ nx, ny
 !================================================================================
 !                        1.0  Prologue
 !
 !  2D process grid
   CALL mpi_init(ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
   CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
   CALL mpi_dims_create(npes, ndims, dims, ierr)
   CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,&
        &               ierr)
 !
   CALL mpi_comm_rank(comm_cart, me, ierr)
   CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr)
   CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr)
   CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr)
 !
   CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr)
   CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr)
 !
 !  Read problem inputs
   IF(me.EQ.0) THEN
      READ(*,in)
      WRITE(*,in)
   END IF
   CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr)
   CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr)
 !================================================================================
 !                        2.0  2d Grid construction
 !
 !  Partition 2D grid
   npt_glob(1) = nx+1
   npt_glob(2) = ny+1
   DO i=1,ndims
      CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i))
      e(i) = s(i) + npt_loc(i) - 1
   END DO
   WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', &
        &  (coords(i),s(i),e(i),i=1,ndims)
 !
 !   Global mesh
   dx = 1.0d0/REAL(nx)
   dy = 1.0d0/REAL(ny)
   ALLOCATE(xgrid(0:nx))
   ALLOCATE(ygrid(0:ny))
   xgrid = [ (i*dx, i=0,nx) ]
   ygrid = [ (i*dy, i=0,ny) ]
 !================================================================================
 !                        3.0  FD Laplacian
 !
   id=RESHAPE([ 0, -1, 0, 1, 0,  & 
                0,  0,-1, 0, 1], &
               [5,2])
   npoints = 5
   CALL init(s, e, id, .FALSE., mat, comm_cart)
 !
   CALL laplacian(dx, dy, mat)
 !================================================================================
 !                       4.0 Check matrice-vector product
 !
 !  Local arrays with ghost cells
   lb = mat%s-1
   ub = mat%e+1
   ALLOCATE(arr(lb(1):ub(1),lb(2):ub(2)))
   ALLOCATE(fexact(lb(1):ub(1),lb(2):ub(2)))
   ALLOCATE(barr1(lb(1):ub(1),lb(2):ub(2)))
   ALLOCATE(barr2(lb(1):ub(1),lb(2):ub(2)))
   ALLOCATE(barr3(lb(1):ub(1),lb(2):ub(2)))
 !
 !   Constant vector => Laplacian = 0
   barr1 = 0
   arr = 1.0
   barr1 = vmx(mat,arr)
   IF(mat%s(1).EQ.0)  barr1(0,:)  = 0.0  ! discard boundary values
   IF(mat%e(1).EQ.nx) barr1(nx,:) = 0.0
   IF(mat%s(2).EQ.0)  barr1(:,0)  = 0.0
   IF(mat%e(2).EQ.ny) barr1(:,ny) = 0.0
   err = norm2_vec(barr1,comm_cart,root=0,garea=[1,1])
   IF(me.EQ.0) THEN
      WRITE(*,'(/a,1pe12.3)') 'Constant vector: ||B1|| =', err
   END IF
 !
 !  Bilinear vector => Laplacian = 0
   arr =0.0d0
   barr2=0.0d0
   DO j=mat%s(2),mat%e(2)
      DO i=mat%s(1),mat%e(1)
         arr(i,j) = xgrid(i)*ygrid(j)
      END DO
   END DO
   CALL exchange(comm_cart, arr)
   barr2 = vmx(mat,arr)
   IF(mat%s(1).EQ.0)  barr2(0,:)  = 0.0  ! discard boundary values
   IF(mat%e(1).EQ.nx) barr2(nx,:) = 0.0
   IF(mat%s(2).EQ.0)  barr2(:,0)  = 0.0
   IF(mat%e(2).EQ.ny) barr2(:,ny) = 0.0
   err = norm2_vec(barr2, comm_cart,root=0,garea=[1,1])
   IF(me.EQ.0) THEN
      WRITE(*,'(a,1pe12.3)') 'Bilinear vector: ||B2|| =', err
   END IF
 !
 !  Biquadratic vector => Laplacian = fexact
   DO j=mat%s(2),mat%e(2)
      DO i=mat%s(1),mat%e(1)
         arr(i,j) = (xgrid(i)*ygrid(j))**2/4.0d0
         fexact(i,j) = (xgrid(i)**2 + ygrid(j)**2)/2.0d0
      END DO
   END DO
   CALL exchange(comm_cart, arr)
   CALL exchange(comm_cart, fexact)
   barr3 = vmx(mat,arr) - fexact
   IF(mat%s(1).EQ.0)  barr3(0,:)  = 0.0  ! discard boundary values
   IF(mat%e(1).EQ.nx) barr3(nx,:) = 0.0
   IF(mat%s(2).EQ.0)  barr3(:,0)  = 0.0
   IF(mat%e(2).EQ.ny) barr3(:,ny) = 0.0
   err = norm2_vec(barr3,comm_cart,root=0,garea=[1,1])
   IF(me.EQ.0) THEN
      WRITE(*,'(a,1pe12.3)') 'Biquadratic vector: ||B3|| =', err
   END IF
 !================================================================================
 !                        9.0  Epilogue
   CALL h5file
   CALL MPI_FINALIZE(ierr)
 CONTAINS
   SUBROUTINE disp(str, arr)
     CHARACTER(len=*), INTENT(in) :: str
     REAL(rkind), INTENT(in)      :: arr(:,:)
     INTEGER :: j
     WRITE(*,'(/a)') str
     DO j=1,SIZE(arr,2)
        WRITE(*,'(10f8.3)') arr(:,j)
     END DO
   END SUBROUTINE disp
 !
   SUBROUTINE h5file
     USE futils
     CHARACTER(len=128) :: file='test_stencil.h5'
     INTEGER :: fid
     CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart)
     CALL putarr(fid, '/xgrid', xgrid, ionode=0)  ! only rank 0 does IO
     CALL putarr(fid, '/ygrid', ygrid, ionode=0)  ! only rank 0 does IO
     CALL putarrnd(fid, '/barr1', barr1,(/1,2/), garea=(/1,1/))
     CALL putarrnd(fid, '/barr2', barr2,(/1,2/), garea=(/1,1/))
     CALL putarrnd(fid, '/barr3', barr3,(/1,2/), garea=(/1,1/))
     CALL putmat(fid, '/MAT', mat)
     CALL closef(fid)
   END SUBROUTINE h5file
 !
   FUNCTION outerprod(x, y) RESULT(r)
 !
 !  outer product
 !
     REAL(rkind), INTENT(in) :: x(:), y(:)
     REAL(rkind)             :: r(SIZE(x),SIZE(y))
     INTEGER :: i, j
     DO j=1,SIZE(y)
        DO i=1,SIZE(x)
           r(i,j) = x(i)*y(j)
        END DO
     END DO
   END FUNCTION outerprod
 !
   FUNCTION rhs(x,y)
     REAL(rkind), INTENT(in) :: x(:), y(:)
     REAL(rkind)             :: rhs(SIZE(x),SIZE(y))
     rhs = -10.d0*pi**2 * outerprod(SIN(pi*x), SIN(3.d0*pi*y))
   END FUNCTION rhs
 !
   FUNCTION exact(x,y)
     REAL(rkind), INTENT(in) :: x(:), y(:)
     REAL(rkind)             :: exact(SIZE(x),SIZE(y))
     exact = outerprod(SIN(pi*x), SIN(3.d0*pi*y))
   END FUNCTION exact
 END PROGRAM main
diff --git a/multigrid/src/test_stencilg.f90 b/multigrid/src/test_stencilg.f90
index 0b920cc..0696dd2 100644
--- a/multigrid/src/test_stencilg.f90
+++ b/multigrid/src/test_stencilg.f90
@@ -1,203 +1,203 @@
 !>
 !> @file test_stencilg.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE mod
   USE iso_fortran_env, ONLY : rkind => real64
   IMPLICIT NONE
 !
   LOGICAL, PARAMETER     :: nldebug=.FALSE.
   REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0)
 CONTAINS
 END MODULE mod
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 PROGRAM main
   USE mpi
   USE pputils2, ONLY : dist1d
   USE gvector, ONLY  : gvector_2d, ASSIGNMENT(=), OPERATOR(-)
   USE parmg, ONLY    : exchange, norm_vec
   USE stencil, ONLY  : stencil_2d, init, laplacian, putmat, OPERATOR(*)
   USE mod
   IMPLICIT NONE
 !
   INTEGER, PARAMETER :: ndims=2
 !
   INTEGER                   :: me, neighs(4), npes, ierr
   INTEGER, DIMENSION(ndims) :: dims=[0,0]
   INTEGER, DIMENSION(ndims) :: coords, comm1d
   LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.]
   LOGICAL                   :: reorder =.FALSE.
   INTEGER                   :: comm_cart
 !
   INTEGER                   :: nx=4, ny=4    ! Number of intervals
   INTEGER, DIMENSION(ndims) :: e, s, g, npt_glob, npt_loc
 !
   REAL(rkind), ALLOCATABLE  :: xgrid(:), ygrid(:)
   TYPE(gvector_2d)          :: arr, fexact
   TYPE(gvector_2d)          :: barr1, barr2, barr3
   REAL(rkind)               :: dx, dy
   REAL(rkind)               :: err
   INTEGER, DIMENSION(5,2)   :: id       ! 5-point stencil 
   INTEGER                   :: npoints
   TYPE(stencil_2d)          :: mat
   INTEGER                   :: i, j
 !
   NAMELIST /in/ nx, ny
 !================================================================================
 !                        1.0  Prologue
 !
 !  2D process grid
   CALL mpi_init(ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
   CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
   CALL mpi_dims_create(npes, ndims, dims, ierr)
   CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,&
        &               ierr)
 !
   CALL mpi_comm_rank(comm_cart, me, ierr)
   CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr)
   CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr)
   CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr)
 !
   CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr)
   CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr)
 !
 !  Read problem inputs
   IF(me.EQ.0) THEN
      READ(*,in)
      WRITE(*,in)
   END IF
   CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr)
   CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr)
 !================================================================================
 !                        2.0  2d Grid construction
 !
 !  Partition 2D grid
   npt_glob(1) = nx+1
   npt_glob(2) = ny+1
   DO i=1,ndims
      CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i))
      e(i) = s(i) + npt_loc(i) - 1
   END DO
   WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', &
        &  (coords(i),s(i),e(i),i=1,ndims)
 !
 !   Global mesh
   dx = 1.0d0/REAL(nx)
   dy = 1.0d0/REAL(ny)
   ALLOCATE(xgrid(0:nx))
   ALLOCATE(ygrid(0:ny))
   xgrid = [ (i*dx, i=0,nx) ]
   ygrid = [ (i*dy, i=0,ny) ]
 !================================================================================
 !                        3.0  FD Laplacian
 !
   id=RESHAPE([ 0, -1, 0, 1, 0,  & 
                0,  0,-1, 0, 1], &
               [5,2])
   npoints = 5
   CALL init(s, e, id, .FALSE., mat, comm_cart)
 !
   CALL laplacian(dx, dy, mat)
 !================================================================================
 !                       4.0 Check matrice-vector product
 !
 !  Local arrays with ghost cells
   g = [1,1]
   arr   = gvector_2d(s, e, g)
   barr1 = gvector_2d(s, e, g)
   barr2 = gvector_2d(s, e, g)
   barr3 = gvector_2d(s, e, g)
   fexact = gvector_2d(s, e, g)
 !
 !   Constant vector => Laplacian = 0
   arr = 1.0d0
   CALL exchange(comm_cart, arr)
   barr1 = mat*arr
   IF(s(1).EQ.0)  barr1%val(0,:)  = 0.0  ! discard boundary values
   IF(e(1).EQ.nx) barr1%val(nx,:) = 0.0
   IF(s(2).EQ.0)  barr1%val(:,0)  = 0.0
   IF(e(2).EQ.ny) barr1%val(:,ny) = 0.0
   err = norm_vec(barr1, comm_cart, root=0)
   IF(me.EQ.0) THEN
      WRITE(*,'(/a,1pe12.3)') 'Constant vector: ||B1|| =', err
   END IF
 !
 !  Bilinear vector => Laplacian = 0
   DO j=s(2),e(2)
      DO i=s(1),e(1)
         arr%val(i,j) = xgrid(i)*ygrid(j)
      END DO
   END DO
   CALL exchange(comm_cart, arr)
   barr2 = mat*arr
   IF(s(1).EQ.0)  barr2%val(0,:)  = 0.0  ! discard boundary values
   IF(e(1).EQ.nx) barr2%val(nx,:) = 0.0
   IF(s(2).EQ.0)  barr2%val(:,0)  = 0.0
   IF(e(2).EQ.ny) barr2%val(:,ny) = 0.0
   err = norm_vec(barr2, comm_cart, root=0)
   IF(me.EQ.0) THEN
      WRITE(*,'(a,1pe12.3)') 'Bilinear vector: ||B2|| =', err
   END IF
 !
 !  Biquadratic vector => Laplacian = fexact
   DO j=s(2),e(2)
      DO i=s(1),e(1)
         arr%val(i,j) = (xgrid(i)*ygrid(j))**2/4.0d0
         fexact%val(i,j) = (xgrid(i)**2 + ygrid(j)**2)/2.0d0
      END DO
   END DO
   CALL exchange(comm_cart, arr)
   CALL exchange(comm_cart, fexact)
   barr3 = mat*arr - fexact
   IF(s(1).EQ.0)  barr3%val(0,:)  = 0.0  ! discard boundary values
   IF(e(1).EQ.nx) barr3%val(nx,:) = 0.0
   IF(s(2).EQ.0)  barr3%val(:,0)  = 0.0
   IF(e(2).EQ.ny) barr3%val(:,ny) = 0.0
   err = norm_vec(barr3, comm_cart, root=0)
   IF(me.EQ.0) THEN
      WRITE(*,'(a,1pe12.3)') 'Biquadratic vector: ||B3|| =', err
   END IF
 !================================================================================
 !                        9.0  Epilogue
   CALL h5file
   CALL MPI_FINALIZE(ierr)
 !
 CONTAINS
   SUBROUTINE h5file
     USE futils
     CHARACTER(len=128) :: file='test_stencilg.h5'
     INTEGER :: fid
     CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart)
     CALL putarr(fid, '/xgrid', xgrid, ionode=0)  ! only rank 0 does IO
     CALL putarr(fid, '/ygrid', ygrid, ionode=0)  ! only rank 0 does IO
     CALL putarrnd(fid, '/arr',   arr%val,(/1,2/), garea=g)
     CALL putarrnd(fid, '/barr1', barr1%val,(/1,2/), garea=g)
     CALL putarrnd(fid, '/barr2', barr2%val,(/1,2/), garea=g)
     CALL putarrnd(fid, '/barr3', barr3%val,(/1,2/), garea=g)
     CALL putmat(fid, '/MAT', mat)
     CALL closef(fid)
   END SUBROUTINE h5file
 !
 END PROGRAM main
diff --git a/multigrid/src/test_transf2d.f90 b/multigrid/src/test_transf2d.f90
index a126b7e..82098d7 100644
--- a/multigrid/src/test_transf2d.f90
+++ b/multigrid/src/test_transf2d.f90
@@ -1,301 +1,301 @@
 !>
 !> @file test_transf2d.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test 2d multigrid 
 !
   USE multigrid
   USE csr
   IMPLICIT NONE
   INCLUDE 'mpif.h'
 !
   DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
   INTEGER, DIMENSION(2) :: n, nidbas, ngauss, alpha
   DOUBLE PRECISION :: kx=4.d0, ky=3.d0, sigma=10.0d0
   INTEGER :: levels=1
   DOUBLE PRECISION :: omega=2.0d0/3.0d0
 !
   DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:)
   DOUBLE PRECISION :: dx, dy
   INTEGER :: ix, iy
 !
   DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: errdisc(:), resid(:)
 !
   DOUBLE PRECISION, ALLOCATABLE, TARGET :: fcoarse(:,:)
   DOUBLE PRECISION, POINTER :: fcoarse_1d(:)
   DOUBLE PRECISION, ALLOCATABLE, TARGET :: vfine(:,:)
   DOUBLE PRECISION, POINTER :: vfine_1d(:)
   DOUBLE PRECISION, ALLOCATABLE :: vfine_grid(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: err_restrict(:), err_prolong(:), &
        &                           disc_err_prolong(:)
 !
   INTEGER :: ierr, me
   INTEGER :: l, nterms
   INTEGER :: its
 !
   TYPE(grid2d), ALLOCATABLE :: grids(:)
 !
   NAMELIST /newrun/ n, nidbas, ngauss, kx, ky, sigma, alpha, levels
 !--------------------------------------------------------------------------------
 !                    1.   Prologue
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
 !   Inputs
 !
   n = (/8, 8/)
   nidbas=(/3,3/)
   ngauss=(/2,2/)
   alpha = (/0,0/)
   kx=4
   ky=3
   sigma=10.0d0
   levels=2
 !
   IF(me.EQ.0) THEN
      READ(*,newrun)
      WRITE(*,newrun)
   END IF
      CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(alpha, 2, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(kx, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(ky, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(sigma, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
 !
 !   Adjust number of levels
 !
   levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2)))
 !
 !   Create grids
 !
   dx = 1.0d0/REAL(n(1),8)
   dy = 1.0d0/REAL(n(2),8)
   ALLOCATE(x(0:n(1)), y(0:n(2)))
   DO ix=0,n(1)
      x(ix) = ix*dx
   END DO
   DO iy=0,n(2)
      y(iy) = iy*dy
   END DO
 !
   ALLOCATE(grids(levels))
   CALL create_grid(x, y, nidbas, ngauss, alpha, grids)
   WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry'
   WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels)
 !
 !   Build FE matrices and set BC
 !
   nterms = 3
   DO l=1,levels
      CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms)
      CALL ibcmat(grids(l), grids(l)%mata)
      CALL to_mat(grids(l)%mata)
   END DO
 !
 !   Set BC on grid transfer matrices
 !
   CALL ibc_transf(grids,1,3)
   CALL ibc_transf(grids,2,3)
 !--------------------------------------------------------------------------------
 !                   1.  Direct solutions
 !
   WRITE(*,'(/a)') 'Direct solutions for all levels ...'
   ALLOCATE(errdisc(levels))
   ALLOCATE(resid(levels))
   WRITE(*,'(3a5,2a12)') 'l', 'nx', 'ny', 'err', 'resid'
   DO l=1,levels
      CALL disrhs(grids(l)%spl, grids(l)%f, rhs)
      CALL ibcrhs(grids(l), grids(l)%f)
      grids(l)%v = grids(l)%f
      CALL direct_solve(grids(l), grids(l)%v1d, debug=.FALSE.)
      errdisc(l) = disc_err(grids(l)%spl, grids(l)%v, sol)
      resid(l) = residue(grids(l)%mata, grids(l)%f1d, grids(l)%v1d)
      WRITE(*,'(3i5,2(1pe12.3))') l, grids(l)%n, Errdisc(l), resid(l)
   END DO
 !
 !  Grid values of direct solutions at the finest levels
   ALLOCATE(sol_direct_grid(0:n(1),0:n(2)))
   ALLOCATE(sol_anal_grid(0:n(1),0:n(2)))
   CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, &
        &       [0,0], grids(1)%v)
   sol_anal_grid = sol(grids(1)%x, grids(1)%y)
 !--------------------------------------------------------------------------------
 !                   2.  Test restrict and prolong
 !
   WRITE(*,'(/a)') 'Testing restrict and prolong...'
   WRITE(*,'(3a5,3a12)') 'l', 'nx', 'ny', 'rhs', 'sol', 'disc_err'
   ALLOCATE(err_restrict(2:levels))
   ALLOCATE(err_prolong(2:levels))
   ALLOCATE(disc_err_prolong(2:levels))
   ALLOCATE(vfine_grid(0:n(1),0:n(2)))
   DO l=2,levels
      ALLOCATE(fcoarse(SIZE(grids(l)%f,1),SIZE(grids(l)%f,2)))
      fcoarse_1d(1:SIZE(grids(l)%f1d)) => fcoarse
      ALLOCATE(vfine(SIZE(grids(l-1)%v,1),SIZE(grids(l-1)%v,2)))
      vfine_1d(1:SIZE(grids(l-1)%v1d)) => vfine
 !
      fcoarse = restrict(grids(l)%matp,  grids(l-1)%f)
      err_restrict(l) =  MAXVAL(ABS(fcoarse_1d-grids(l)%f1d))
 !
      CALL direct_solve(grids(l), fcoarse_1d)
      vfine = prolong(grids(l)%matp, fcoarse)
      disc_err_prolong(l) = disc_err(grids(l-1)%spl, vfine, sol)
      err_prolong(l) = MAXVAL(ABS(vfine_1d-grids(l-1)%v1d))
 !
      IF(l.EQ.2) THEN   ! Grid val on finest grid
         CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, vfine_grid, &
              &       [0,0], vfine)
      END IF
 !
      WRITE(*,'(3i5,3(1pe12.3))') l, grids(l)%n, err_restrict(l), err_prolong(l), &
           &                      disc_err_prolong(l)
      DEALLOCATE(fcoarse)
      DEALLOCATE(vfine)
   END DO
 !--------------------------------------------------------------------------------
 !                   9.  Epilogue
 !
 !   Creata HDF5 file
 !
   IF(me.EQ.0) CALL h5file
 !
   CALL mpi_finalize(ierr)
 !--------------------------------------------------------------------------------
 CONTAINS
 !+++
   FUNCTION rhs(x, y)
 !
 ! Return problem RHS
 !
     DOUBLE PRECISION, INTENT(in) :: x, y
     DOUBLE PRECISION :: rhs
     rhs = SIN(PI*kx*x)*SIN(PI*ky*y)
   END FUNCTION rhs
 !+++
   FUNCTION sol(x, y)
 !
 ! Return exact problem solution
 !
     DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
     DOUBLE PRECISION :: sol(SIZE(x),SIZE(y))
     DOUBLE PRECISION :: c
     INTEGER :: j
     DO j=1,SIZE(y)
        c = SIN(PI*ky*y(j)) / (PI**2*(kx**2+ky**2) + sigma**2)
        sol(:,j) = c * SIN(PI*kx*x(:))
     END DO
   END FUNCTION sol
 !+++
   SUBROUTINE coefeq(x, y, idt, idw, c)
 !
 ! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV)
 !
     DOUBLE PRECISION, INTENT(in)  :: x, y
     INTEGER, INTENT(out)           :: idt(:,:), idw(:,:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
     c(1) = 1.0d0
     idt(1,1) = 1
     idt(1,2) = 0
     idw(1,1) = 1
     idw(1,2) = 0
 !
     c(2) = 1.0d0
     idt(2,1) = 0
     idt(2,2) = 1
     idw(2,1) = 0
     idw(2,2) = 1
 !
     c(3) = sigma
     idt(3,1) = 0
     idt(3,2) = 0
     idw(3,1) = 0
     idw(3,2) = 0
 
   END SUBROUTINE coefeq
 !+++
   SUBROUTINE h5file
     USE futils
     CHARACTER(len=128) :: file='test_transf2d.h5'
     INTEGER :: fid
     INTEGER :: l
     CHARACTER(len=64) :: dsname
     CALL creatf(file, fid, real_prec='d')
     CALL attach(fid, '/', 'NX', n(1))
     CALL attach(fid, '/', 'NY', n(2))
     CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
     CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
     CALL attach(fid, '/', 'KX', kx)
     CALL attach(fid, '/', 'KY', ky)
     CALL attach(fid, '/', 'SIGMA', sigma)
     CALL attach(fid, '/', 'ALPHA1', alpha(1))
     CALL attach(fid, '/', 'ALPHA2', alpha(2))
     CALL attach(fid, '/', 'LEVELS', levels)
     CALL creatg(fid, '/mglevels')
     DO l=1,levels
        WRITE(dsname,'("/mglevels/level.",i2.2)') l
        CALL creatg(fid, TRIM(dsname))
        CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
        IF(l.GT.1) THEN
           CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
           CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
        END IF
        CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
        CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
        CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
        CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
        CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
        CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
     END DO
 !
 !  Solutions at finest grid
 !
     CALL creatg(fid, '/solutions')
     CALL putarr(fid, '/solutions/xg', grids(1)%x)
     CALL putarr(fid, '/solutions/yg', grids(1)%y)
     CALL putarr(fid, '/solutions/direct', sol_direct_grid)
     CALL putarr(fid, '/solutions/anal', sol_anal_grid)
     CALL putarr(fid, '/solutions/vfine', vfine_grid)
 !
 !   Some errors
 !
     CALL creatg(fid, '/errors')
     CALL putarr(fid, '/errors/errdisc', errdisc)
     CALL putarr(fid, '/errors/resid', resid)
     CALL putarr(fid, '/errors/restrict', err_restrict)
     CALL putarr(fid, '/errors/prolong', err_prolong)
     CALL putarr(fid, '/errors/disc_err_prolong', disc_err_prolong)
 !
     CALL closef(fid)
   END SUBROUTINE h5file
 !+++
 END PROGRAM
diff --git a/multigrid/src/test_transf2d_cyl.f90 b/multigrid/src/test_transf2d_cyl.f90
index 66427e6..1a4ae79 100644
--- a/multigrid/src/test_transf2d_cyl.f90
+++ b/multigrid/src/test_transf2d_cyl.f90
@@ -1,321 +1,321 @@
 !>
 !> @file test_transf2d_cyl.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Test 2d multigrid 
 !   Cylindrical case
 !
   USE multigrid
   USE csr
   IMPLICIT NONE
   INCLUDE 'mpif.h'
 !
   DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
   INTEGER, DIMENSION(2) :: n, nidbas, ngauss
   INTEGER :: modem=22, modep=10
   INTEGER :: levels=1
   CHARACTER(len=4) :: prb='poly'
   LOGICAL :: nluniq=.TRUE.
 !
   DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:)
   DOUBLE PRECISION :: dx, dy
   INTEGER :: ix, iy
 !
   DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: errdisc(:), resid(:)
 !
   DOUBLE PRECISION, ALLOCATABLE, TARGET :: fcoarse(:,:)
   DOUBLE PRECISION, POINTER :: fcoarse_1d(:)
   DOUBLE PRECISION, ALLOCATABLE, TARGET :: vfine(:,:)
   DOUBLE PRECISION, POINTER :: vfine_1d(:)
   DOUBLE PRECISION, ALLOCATABLE :: vfine_grid(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: err_restrict(:), err_prolong(:), &
        &                           disc_err_prolong(:)
 !
   INTEGER :: ierr, me
   INTEGER :: l, nterms
   INTEGER :: its
   INTEGER :: n2
 !
   TYPE(grid2d), ALLOCATABLE :: grids(:)
 !
   NAMELIST /newrun/ n, nidbas, ngauss, modem, modep, levels, prb, nluniq
 !--------------------------------------------------------------------------------
 !                    1.   Prologue
 !
   CALL mpi_init(ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
 !   Inputs
 !
   n = (/8, 8/)
   nidbas=(/3,3/)
   ngauss=(/2,2/)
   modem = 22
   modep = 10
   prb='poly'
   levels=2
   nluniq = .TRUE.
 !
   IF(me.EQ.0) THEN
      READ(*,newrun)
      WRITE(*,newrun)
   END IF
      CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(modem, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(modep, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
      CALL mpi_bcast(nluniq, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
 !
 !   Adjust number of levels
 !
   levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2)))
 !
 !   Create grids
 !
   dx = 1.0d0/REAL(n(1),8)
   dy = 2.0d0*pi/REAL(n(2),8)
   ALLOCATE(x(0:n(1)), y(0:n(2)))
   DO ix=0,n(1)
      x(ix) = ix*dx
   END DO
   DO iy=0,n(2)
      y(iy) = iy*dy
   END DO
 !
   ALLOCATE(grids(levels))
   CALL create_grid(x, y, nidbas, ngauss, [1, 0], grids, period=[.FALSE., .TRUE.], &
        &           debug_in=.FALSE.)
   WRITE(*,'(5a6,a12)') 'l', 'nx', 'ny', 'rx', 'ry', 'shape of v'
   WRITE(*,'(7i6)') (l, grids(l)%n, grids(l)%rank, SHAPE(grids(l)%v), l=1,levels)
 !
 !   Build FE matrices and set BC
 !
   nterms = 2
   DO l=1,levels
      CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms)
      CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq)
      CALL to_mat(grids(l)%mata)
   END DO
 !
 !   Set BC on grid transfer matrices
 !
   CALL ibc_transf(grids, 1, 2)   ! Only right boundary on r (1st dim.)
 !--------------------------------------------------------------------------------
 !                   1.  Direct solutions
 !
   WRITE(*,'(/a)') 'Direct solutions for all levels ...'
   WRITE(*,'(3a5,2a12)') 'l', 'nx', 'ny', 'err', 'resid'
 !
   ALLOCATE(errdisc(levels))
   ALLOCATE(resid(levels))
 !
   DO l=1,levels
      CALL disrhs(grids(l)%spl, grids(l)%f, rhs)
      CALL ibcrhs(grids(l), grids(l)%f, nluniq_in=nluniq)
 !
      grids(l)%v = grids(l)%f
      CALL direct_solve(grids(l), grids(l)%v1d, debug=.FALSE.)
 !
      resid(l) = residue(grids(l)%mata, grids(l)%f1d, grids(l)%v1d)
      CALL back_transf(grids(l), grids(l)%v, nluniq_in=nluniq)
      errdisc(l) = disc_err(grids(l)%spl, grids(l)%v, sol)
      WRITE(*,'(3i5,2(1pe12.3))') l, grids(l)%n, Errdisc(l), resid(l)
   END DO
 !
 !  Grid values of direct solutions at the finest levels
   ALLOCATE(sol_direct_grid(0:n(1),0:n(2)))
   ALLOCATE(sol_anal_grid(0:n(1),0:n(2)))
   CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, &
        &       [0,0], grids(1)%v)
   sol_anal_grid = sol(grids(1)%x, grids(1)%y)
 !--------------------------------------------------------------------------------
 !                   2.  Test restrict and prolong
 !
   WRITE(*,'(/a)') 'Testing restrict and prolong...'
   WRITE(*,'(3a5,3a12)') 'l', 'nx', 'ny', 'rhs', 'sol', 'disc_err'
   ALLOCATE(err_restrict(2:levels))
   ALLOCATE(err_prolong(2:levels))
   ALLOCATE(disc_err_prolong(2:levels))
   ALLOCATE(vfine_grid(0:n(1),0:n(2)))
   DO l=2,levels
      ALLOCATE(fcoarse(SIZE(grids(l)%f,1),SIZE(grids(l)%f,2)))
      fcoarse_1d(1:SIZE(grids(l)%f1d)) => fcoarse
      ALLOCATE(vfine(SIZE(grids(l-1)%v,1),SIZE(grids(l-1)%v,2)))
      vfine_1d(1:SIZE(grids(l-1)%v1d)) => vfine
 !
      fcoarse(:,:) = restrict_cyl(grids(l),  grids(l-1)%f, nluniq)
 !
      err_restrict(l) =  MAXVAL(ABS(fcoarse_1d-grids(l)%f1d))
 !
      CALL direct_solve(grids(l), fcoarse_1d)
 !
      vfine(:,:) = prolong_cyl(grids(l), fcoarse, nluniq)
 !
      CALL back_transf(grids(l-1), vfine, nluniq_in=nluniq)
      disc_err_prolong(l) = disc_err(grids(l-1)%spl, vfine, sol)
      err_prolong(l) = MAXVAL(ABS(vfine_1d-grids(l-1)%v1d))
 !
      IF(l.EQ.2) THEN   ! Grid val on finest grid
         CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, vfine_grid, &
              &       [0,0], vfine)
      END IF
 !
      WRITE(*,'(3i5,3(1pe12.3))') l, grids(l)%n, err_restrict(l), err_prolong(l), &
           &                      disc_err_prolong(l)
      DEALLOCATE(fcoarse)
      DEALLOCATE(vfine)
   END DO
 !--------------------------------------------------------------------------------
 !                   9.  Epilogue
 !
 !   Creata HDF5 file
 !
   IF(me.EQ.0) CALL h5file
 !
   CALL mpi_finalize(ierr)
 !--------------------------------------------------------------------------------
 CONTAINS
 !+++
   FUNCTION rhs(r, theta)
 !
 ! Return problem RHS
 !
     USE math_util, ONLY : root_bessj
     DOUBLE PRECISION, INTENT(in) :: r, theta
     DOUBLE PRECISION :: rhs
     DOUBLE PRECISION :: nump
 !
     SELECT CASE(TRIM(prb))
     CASE('poly')
        rhs = REAL(4*(modem+1),8)*r**(modem+1)*COS(REAL(modem,8)*theta)
     CASE('bess')
        nump = root_bessj(modem, modep)
        rhs = r * nump**2 * BESSEL_JN(modem, nump*r) * COS(modem*theta)
     END SELECT
   END FUNCTION rhs
 !+++
   FUNCTION sol(r, theta)
 !
 ! Return exact problem solution
 !
     USE math_util, ONLY : root_bessj
     DOUBLE PRECISION, INTENT(in) :: r(:), theta(:)
     DOUBLE PRECISION :: sol(SIZE(r),SIZE(theta))
     DOUBLE PRECISION :: nump
     INTEGER :: j
 !
     SELECT CASE(TRIM(prb))
     CASE('poly')
        DO j=1,SIZE(theta)
           sol(:,j) = (1-r(:)**2) * r(:)**modem * COS(modem*theta(j))
        END DO
     CASE('bess')
        nump = root_bessj(modem, modep)
        DO j=1,SIZE(theta)
           sol(:,j) = BESSEL_JN(modem, nump*r(:)) * COS(modem*theta(j))
        END DO
     END SELECT
   END FUNCTION sol
 !+++
   SUBROUTINE coefeq(r, theta, idt, idw, c)
 !
 ! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV)
 !
     DOUBLE PRECISION, INTENT(in)  :: r, theta
     INTEGER, INTENT(out)           :: idt(:,:), idw(:,:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
     c(1) = r
     idt(1,1) = 1
     idt(1,2) = 0
     idw(1,1) = 1
     idw(1,2) = 0
 !
     c(2) = 1.0d0/r
     idt(2,1) = 0
     idt(2,2) = 1
     idw(2,1) = 0
     idw(2,2) = 1
   END SUBROUTINE coefeq
 !+++
   SUBROUTINE h5file
     USE futils
     CHARACTER(len=128) :: file='test_transf2d_cyl.h5'
     INTEGER :: fid
     INTEGER :: l
     CHARACTER(len=64) :: dsname
     CALL creatf(file, fid, real_prec='d')
     CALL attach(fid, '/', 'NX', n(1))
     CALL attach(fid, '/', 'NY', n(2))
     CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
     CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
     CALL attach(fid, '/', 'MODEM', modem)
     CALL attach(fid, '/', 'MODEP', modep)
     CALL attach(fid, '/', 'LEVELS', levels)
     CALL creatg(fid, '/mglevels')
     DO l=1,levels
        WRITE(dsname,'("/mglevels/level.",i2.2)') l
        CALL creatg(fid, TRIM(dsname))
        CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
        IF(l.GT.1) THEN
           CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
           CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
        END IF
        CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
        CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
        CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
        CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
        CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
        CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
     END DO
 !
 !  Solutions at finest grid
 !
     CALL creatg(fid, '/solutions')
     CALL putarr(fid, '/solutions/xg', grids(1)%x)
     CALL putarr(fid, '/solutions/yg', grids(1)%y)
     CALL putarr(fid, '/solutions/direct', sol_direct_grid)
     CALL putarr(fid, '/solutions/anal', sol_anal_grid)
     CALL putarr(fid, '/solutions/vfine', vfine_grid)
 !
 !   Some errors
 !
     CALL creatg(fid, '/errors')
     CALL putarr(fid, '/errors/errdisc', errdisc)
     CALL putarr(fid, '/errors/resid', resid)
     CALL putarr(fid, '/errors/restrict', err_restrict)
     CALL putarr(fid, '/errors/prolong', err_prolong)
     CALL putarr(fid, '/errors/disc_err_prolong', disc_err_prolong)
 !
     CALL closef(fid)
   END SUBROUTINE h5file
 !+++
 END PROGRAM
diff --git a/multigrid/src/transfer1d.f90 b/multigrid/src/transfer1d.f90
index c245f09..319883d 100644
--- a/multigrid/src/transfer1d.f90
+++ b/multigrid/src/transfer1d.f90
@@ -1,126 +1,126 @@
 !>
 !> @file transfer1d.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !--------------------------------------------------------------------------------
 !--------------------------------------------------------------------------------
 !--------------------------------------------------------------------------------
 PROGRAM main
   USE multigrid
   IMPLICIT NONE
 !
   INTEGER          :: nx=8, nidbas=1, ngauss=4, alpha=0, modem=0
   DOUBLE PRECISION :: sigma=1.0d0
   LOGICAL          :: nlper=.FALSE.
   INTEGER          :: j
 !
   TYPE(grid1d) :: gridx(2)
   TYPE(gemat)  :: prolong_mat, restrict_mat, coarse_mat
   DOUBLE PRECISION, ALLOCATABLE :: arow(:), temp(:,:)
 !
   NAMELIST /newrun/ nx, nidbas, ngauss, sigma, alpha, modem, nlper
 !--------------------------------------------------------------------------------
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !    Set up fine and coarse grids
 !
   CALL create_grid(nx, nidbas, ngauss, alpha, gridx, period=nlper)
   CALL printmat('** Prolongation matrix **', gridx(2)%transf)
 !
 !   Restriction matrix = transpose of prolongation matrix
 !
   CALL mcopy(gridx(2)%transf, prolong_mat)
   CALL init(prolong_mat%mrows, 1, restrict_mat, mrows=prolong_mat%ncols)
   restrict_mat%val = TRANSPOSE(prolong_mat%val)
 !
 !   Compute femat on fine and coarse grids
 !
   IF(nlper) THEN
      CALL femat(gridx(1)%spl, gridx(1)%matap, coefeq)
      CALL printmat('** FE matrix on fine mesh **', gridx(1)%matap)
      CALL femat(gridx(2)%spl, gridx(2)%matap, coefeq)
      CALL printmat('** FE matrix on coarse mesh **', gridx(2)%matap)
   ELSE
      CALL femat(gridx(1)%spl, gridx(1)%mata, coefeq)
      CALL printmat('** FE matrix on fine mesh **', gridx(1)%mata)
      CALL femat(gridx(2)%spl, gridx(2)%mata, coefeq)
      CALL printmat('** FE matrix on coarse mesh **', gridx(2)%mata)
   END IF
 !
 !   Compute coarse FE matrix using transfer matrix
 !
   IF(nlper) THEN
      CALL init(gridx(2)%matap%rank, 1, coarse_mat)
      ALLOCATE(temp(gridx(1)%matap%rank,gridx(2)%matap%rank))
      DO j=1,gridx(2)%matap%rank
         temp(:,j) = vmx(gridx(1)%matap,prolong_mat%val(:,j))
      END DO
      coarse_mat%val = vmx(restrict_mat,temp)
      DEALLOCATE(temp)
   ELSE
      CALL init(gridx(2)%mata%rank, 1, coarse_mat)
      coarse_mat%val = vmx(restrict_mat,vmx(gridx(1)%mata,prolong_mat%val))
   END IF
   CALL printmat('** Coarse FE matrix using transfer operators **', coarse_mat)
 !
 !   Compute the diff of Ac - R*Af*P
 !
   IF(nlper) THEN
      coarse_mat%val = coarse_mat%val - gridx(2)%matap%val
   ELSE
      ALLOCATE(arow(gridx(2)%mata%rank))
      DO j=1,gridx(2)%mata%rank
         CALL getcol(gridx(2)%mata, j, arow)
         coarse_mat%val(:,j) = coarse_mat%val(:,j)-arow(:)
      END DO
      DEALLOCATE(arow)
   END IF
   WRITE(*,'(a,1pe12.3)') 'Diff =', MAXVAL(ABS(coarse_mat%val))
 CONTAINS
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
     SELECT CASE (alpha)
     CASE(0)           ! Cartesian geometry
        c(1) = 1.0d0
        idt(1) = 1
        idw(1) = 1
        c(2) = sigma
        idt(2) = 0
        idw(2) = 0
     CASE(1)
        c(1) = x
        idt(1) = 1
        idw(1) = 1
        c(2) = modem**2/x
        idt(2) = 0
        idw(2) = 0
     CASE default
        WRITE(*,'(a,i0,a)') 'COEFEQ: alpha ', alpha, ' not defined!'
     END SELECT
   END SUBROUTINE coefeq
 !--------------------------------------------------------------------------------
 END PROGRAM main
diff --git a/multigrid/src/transfer1d_col.f90 b/multigrid/src/transfer1d_col.f90
index d0724d5..75b9d8c 100644
--- a/multigrid/src/transfer1d_col.f90
+++ b/multigrid/src/transfer1d_col.f90
@@ -1,53 +1,53 @@
 !>
 !> @file transfer1d_col.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !    Obtain grid transfer by collocation
 !
   USE multigrid
   IMPLICIT NONE
 !
   INTEGER :: nx=8, nidbas=1
   LOGICAL :: nlper=.TRUE.
 !
   TYPE(grid1d) :: gridx(2)
   TYPE(gemat)  :: pmat
 !
   NAMELIST /newrun/ nx, nidbas, nlper
 !--------------------------------------------------------------------------------
   READ(*,newrun)
   WRITE(*,newrun)
 !
   CALL create_grid(nx, nidbas, 1, 0, gridx, period=nlper)
   CALL printmat('** Prolongation matrix (using mass matrix) **', gridx(2)%transf)
 !
   CALL calc_pmat(gridx(1), gridx(2), pmat, .TRUE.)
   CALL printmat('** Prolongation matrix (by collocation) **', pmat)
 !
   WRITE(*,'(/a,1pe12.3)') 'Max diff =', MAXVAL(ABS(pmat%val-gridx(2)%transf%val))
 !
 END PROGRAM main
 
diff --git a/multigrid/src/two_grid.f90 b/multigrid/src/two_grid.f90
index db513a8..4342a4d 100644
--- a/multigrid/src/two_grid.f90
+++ b/multigrid/src/two_grid.f90
@@ -1,189 +1,189 @@
 !>
 !> @file two_grid.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Check some properties of grid transfer
 !
   USE multigrid
   USE math_util, ONLY : root_bessj
   IMPLICIT NONE
 !
   INTEGER          :: nx=8, nidbas=1, ngauss=2, alpha=0
   INTEGER          :: modem=22, modep=10
   INTEGER          :: levels=2
   INTEGER          :: l, nrank
   DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0)
   DOUBLE PRECISION, ALLOCATABLE :: v_prolong(:)
 !
   TYPE(grid1d)  :: gridx(2)
 !
   NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, modem, modep, alpha
 !--------------------------------------------------------------------------------
 !                   1.  Prologue
 !   Inputs
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Create grids
 !
   CALL create_grid(nx, nidbas, ngauss, alpha, gridx)
   WRITE(*,'(a/(20i6))') 'Number of intervals in grids', (gridx(l)%n, l=1,levels)
 !
 !   Create FE matrice and set BC u(0)=u(1)=0
 !
   DO l=1,levels
      CALL femat(gridx(l)%spl, gridx(l)%mata, coefeq)
 !
 !     Left Dirichlet BC (only for Cartesian geometry)
      IF(alpha .EQ. 0) THEN
         CALL ibcmat(1, gridx(l)%mata)
      END IF
 !
 !     Right Dirichlet BC
      CALL ibcmat(gridx(l)%mata%rank, gridx(l)%mata)
 !
 !     BC on grid transfer operator
      IF(l.GT.1) THEN
         WHERE( ABS(gridx(l)%transf%val) < 1.d-8) gridx(l)%transf%val=0.0d0
         IF(alpha .EQ. 0) gridx(l)%transf%val(2:,1)=0.0d0
         gridx(l)%transf%val(1:gridx(l-1)%rank-1,gridx(l)%rank)=0.0d0
      END IF
   END DO
 !
 !   Construct RHS and set BC only on the finest grid
 !
   nrank = gridx(1)%rank
   CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs)
 !
 !     Left Dirichlet BC (only for Cartesian geometry)
   IF(alpha .EQ. 0) THEN
      gridx(1)%f(1) = 0.0d0
   END IF
 !
 !     Right Dirichlet BC
   gridx(1)%f(nrank) = 0.0d0
 !
 !   RHS on coarse grid by restriction
 !
   gridx(2)%f = restrict(gridx(2)%transf,gridx(1)%f)
 !--------------------------------------------------------------------------------
 !                   2.  Direct solutions
 !
   DO l=1,levels
      CALL direct_solve(gridx(l), gridx(l)%v)
      WRITE(*,'(a,i3/(10(1pe12.3)))') 'Sol at level', l, gridx(l)%v
   END DO
 !
 !   Prolongation of coarse solution  
 !
   ALLOCATE(v_prolong(SIZE(gridx(1)%v)))
 !
   v_prolong = prolong(gridx(2)%transf, gridx(2)%v)
   WRITE(*,'(a,i3/(10(1pe12.3)))') 'Prolong. sol.', l, v_prolong
   WRITE(*,'(a,1pe12.3)') 'Error ||V_prolong-V_fine||', normf(gridx(1)%matm, v_prolong-gridx(1)%v)
 !--------------------------------------------------------------------------------
 !                   9.  Epilogue
 !
 !   Creata HDF5 file
 !
   CALL h5file
 !--------------------------------------------------------------------------------
 CONTAINS
   SUBROUTINE h5file
     USE futils
     CHARACTER(len=128) :: file='two_grid.h5'
     INTEGER :: fid
     INTEGER :: l
     CHARACTER(len=64) :: dsname
     CALL creatf(file, fid, real_prec='d')
     CALL attach(fid, '/', 'NX', nx)
     CALL attach(fid, '/', 'NIDBAS', nidbas)
     CALL attach(fid, '/', 'SIGMA', sigma)
     CALL attach(fid, '/', 'KMODE', kmode)
     CALL attach(fid, '/', 'ALPHA', alpha)
     CALL attach(fid, '/', 'LEVELS', levels)
     CALL creatg(fid, '/mglevels')
     DO l=1,levels
        WRITE(dsname,'("/mglevels/level.",i2.2)') l
        CALL creatg(fid, TRIM(dsname))
        CALL putmat(fid, TRIM(dsname)//'/mata', gridx(l)%mata)
        IF(l.GT.1) THEN
           CALL putarr(fid, TRIM(dsname)//'/matp', gridx(l)%transf%val)
           CALL attach(fid, TRIM(dsname)//'/matp', 'M', gridx(l)%transf%mrows)
           CALL attach(fid, TRIM(dsname)//'/matp', 'N', gridx(l)%transf%ncols)
        END IF
        CALL putarr(fid, TRIM(dsname)//'/f', gridx(l)%f)
        CALL putarr(fid, TRIM(dsname)//'/v', gridx(l)%v)
     END DO
     CALL putarr(fid, '/v_prolong', v_prolong)
   END SUBROUTINE h5file
   FUNCTION rhs(x)
     DOUBLE PRECISION, INTENT(in) :: x
     DOUBLE PRECISION :: rhs
     DOUBLE PRECISION :: nump
     SELECT CASE (alpha)
     CASE(0)                  ! Cartesian geometry
        rhs = SIN(pi*kmode*x)
     CASE(1)                  ! Cylindrical
        nump = root_bessj(modem, modep)
        rhs = x * nump**2 * bessel_jn(modem, nump*x)
     END SELECT
   END FUNCTION rhs
   FUNCTION sol(x)
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION :: sol(SIZE(x))
     DOUBLE PRECISION :: nump
     SELECT CASE (alpha)
     CASE(0)                  ! Cartesian geometry
        sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:))
     CASE(1)                  ! Cylindrical
        nump = root_bessj(modem, modep)
        sol(:) = bessel_jn(modem, nump*x(:))
     END SELECT
   END FUNCTION sol
   SUBROUTINE coefeq(x, idt, idw, c)
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: idt(:), idw(:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
     SELECT CASE (alpha)
     CASE(0)           ! Cartesian geometry
        c(1) = 1.0d0
        idt(1) = 1
        idw(1) = 1
        c(2) = sigma
        idt(2) = 0
        idw(2) = 0
     CASE(1)           ! Cylindrical
        c(1) = x
        idt(1) = 1
        idw(1) = 1
        c(2) = REAL(modem,8)**2/x
        idt(2) = 0
        idw(2) = 0
     END SELECT
   END SUBROUTINE coefeq
 END PROGRAM main
diff --git a/multigrid/wk/CMakeLists.txt b/multigrid/wk/CMakeLists.txt
index 98a7017..40db313 100644
--- a/multigrid/wk/CMakeLists.txt
+++ b/multigrid/wk/CMakeLists.txt
@@ -1,53 +1,53 @@
 #
 # @file CMakeLists.txt
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 project(multigrid_wk)
 
 set(MG_TESTS
   transfer1d
   test_relax
   test_mg
   test_mgp
   test_csr
   two_grid
   test_mg2d
   test_relax2d
   test_transf2d
   transfer1d_col
   test_relax2d_cyl
   test_transf2d_cyl
   test_mg2d_cyl
   poisson_fd
 )
 
 set(RUNTESTS "${CMAKE_CURRENT_SOURCE_DIR}/runtest.sh")
 set(BIN_DIR "${multigrid_tests_BINARY_DIR}")
 set(INPUT_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
 
 foreach(prog ${MG_TESTS})
   add_test(${prog} ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1
     ${RUNTESTS} ${BIN_DIR}/${prog} ${INPUT_DIR}
     )
 endforeach()
diff --git a/multigrid/wk/run.sh b/multigrid/wk/run.sh
index 152144a..40ef66a 100644
--- a/multigrid/wk/run.sh
+++ b/multigrid/wk/run.sh
@@ -1,59 +1,59 @@
 #
 # @file run.sh
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 #!/bin/bash
 
 EXE=/home/ttran/bsplines/multigrid/src/poisson_mg
 TMP=/misc/multigrid
 [ -e $TMP ] || mkdir -p $TMP
 
 cat > in0 <<EOF
 &parameters
  prb='dddd'
  mat_type='cds'
  nx=xxx, ny=yyy
  nz=1,
  kx=4, ky=4,
  Lx=100.d00, Ly=800.d00,
  nldebug=f
  nldirect=f,
  icrosst=16,
  beta=-1E-2
  levels=12
  nnu=1
  nu1= 3,4,5
  nu2= 3,4,5
  mu=1, nu0=1,
  relax= 'gs', omega=0.9
  nits=15, atol=0., rtol=1.e-8
 /
 EOF
 for x in 32 64 128 256 512; do
     y=$(echo "4 * $x" | bc)
     echo -n "Nx = $x, Ny = $y: "
     sed "s/xxx/$x/g" in0 | sed "s/yyy/$y/g" | ${EXE} | grep '     3     3 '
     mv poisson_mg.h5 $TMP/dddd_${x}x${y}.h5
 done
 
diff --git a/multigrid/wk/runtest.sh b/multigrid/wk/runtest.sh
index 810d2bb..d4cc61d 100644
--- a/multigrid/wk/runtest.sh
+++ b/multigrid/wk/runtest.sh
@@ -1,37 +1,37 @@
 #
 # @file runtest.sh
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 #!/bin/sh
 
 progname=$1
 input_dir=$2
 
 prog=$(basename ${progname})
 input_file=${input_dir}/${prog}.in
 
 ${progname} < $input_file
 
 exit $?
 
diff --git a/pppack/CMakeLists.txt b/pppack/CMakeLists.txt
index 948a8ea..1f42ad7 100644
--- a/pppack/CMakeLists.txt
+++ b/pppack/CMakeLists.txt
@@ -1,37 +1,37 @@
 #
 # @file CMakeLists.txt
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Nicolas Richart <nicolas.richart@epfl.ch>
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 set(SRCS
   bvalue.f90
   interv.f90
 )
 
 add_library(pppack STATIC ${SRCS})
 
 install(TARGETS pppack
   EXPORT ${BSPLINES_EXPORT_TARGETS}
   ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
 )
diff --git a/pppack/Makefile b/pppack/Makefile
index 0a5cbcf..2c83638 100644
--- a/pppack/Makefile
+++ b/pppack/Makefile
@@ -1,72 +1,72 @@
 #
 # @file Makefile
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Sébastien Jolliet <sebastien.jolliet@epfl.ch>
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 SRCS =	banfac.f90 banslv.f90 bchfac.f90 bchslv.f90 bsplpp.f90 bsplvb.f90 \
 	bsplvd.f90 bspp2d.f90 bvalue.f90 chol1d.f90 colloc.f90 colpnt.f90 \
 	cspint.f90 cubset.f90 cubslo.f90 cubspl.f90 cwidth.f90 difequ.f90 \
 	dtblok.f90 eqblok.f90 evnnot.f90 factrb.f90 fcblok.f90 interv.f90 \
 	knots.f90 l2appr.f90 l2err.f90 l2knts.f90 newnot.f90 ppvalu.f90 \
 	putit.f90 rvec_print.f90 sbblok.f90 setupq.f90 shiftb.f90 slvblk.f90 \
 	smooth.f90 spli2d.f90 spline_hermite_set.f90 spline_hermite_val.f90 \
 	splint.f90 splopt.f90 subbak.f90 subfor.f90 tautsp.f90 titanium.f90
 
 OBJS =	banfac.o banslv.o bchfac.o bchslv.o bsplpp.o bsplvb.o bsplvd.o \
 	bspp2d.o bvalue.o chol1d.o colloc.o colpnt.o cspint.o cubset.o \
 	cubslo.o cubspl.o cwidth.o difequ.o dtblok.o eqblok.o evnnot.o \
 	factrb.o fcblok.o interv.o knots.o l2appr.o l2err.o l2knts.o newnot.o \
 	ppvalu.o putit.o rvec_print.o sbblok.o setupq.o shiftb.o slvblk.o \
 	smooth.o spli2d.o spline_hermite_set.o spline_hermite_val.o splint.o \
 	splopt.o subbak.o subfor.o tautsp.o titanium.o
 
 OBJS = interv.o bvalue.o
 
 LIBS =
 
 CC = cc
 CFLAGS = -g
 FC = ifort
 FFLAGS = $(OPT)
 F90 = $(FC)
 F90FLAGS = $(FFLAGS)
 LDFLAGS =
 
 lib: libpppack.a
 
 libpppack.a: $(OBJS)
 	xiar r $@ $?
 	ranlib $@
 
 clean:
 	rm -f *.o *.mod *~ core
 
 distclean: clean
 	rm -f libpppack.a  a.out
 
 .SUFFIXES:
 .SUFFIXES: .o .c .f90
 
 .f90.o:
 	$(F90) $(F90FLAGS) -c $<
diff --git a/pppack/banfac.f90 b/pppack/banfac.f90
index 50ecb50..3aa668a 100644
--- a/pppack/banfac.f90
+++ b/pppack/banfac.f90
@@ -1,234 +1,234 @@
 !>
 !> @file banfac.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine banfac ( w, nroww, nrow, nbandl, nbandu, iflag )
 
 !*************************************************************************
 !
 !! BANFAC factors a banded matrix without pivoting.
 !
 !  Discussion:
 !
 !    BANFAC returns in W the LU-factorization, without pivoting, of
 !    the banded matrix A of order NROW with (NBANDL+1+NBANDU) bands
 !    or diagonals in the work array W.
 !
 !    Gauss elimination without pivoting is used.  The routine is
 !    intended for use with matrices A which do not require row
 !    interchanges during factorization, especially for the totally
 !    positive matrices which occur in spline calculations.
 !
 !    The matrix storage mode used is the same one used by LINPACK
 !    and LAPACK, and results in efficient innermost loops.
 !
 !    Explicitly, A has
 !
 !      NBANDL bands below the diagonal
 !      1     main diagonal
 !      NBANDU bands above the diagonal
 !
 !    and thus, with MIDDLE=NBANDU+1,
 !    A(I+J,J) is in W(I+MIDDLE,J) for I=-NBANDU,...,NBANDL, J=1,...,NROW.
 !
 !    For example, the interesting entries of a banded matrix
 !    matrix of order 9, with NBANDL=1, NBANDU=2:
 !
 !      11 12 13  0  0  0  0  0  0
 !      21 22 23 24  0  0  0  0  0
 !       0 32 33 34 35  0  0  0  0
 !       0  0 43 44 45 46  0  0  0
 !       0  0  0 54 55 56 57  0  0
 !       0  0  0  0 65 66 67 68  0
 !       0  0  0  0  0 76 77 78 79
 !       0  0  0  0  0  0 87 88 89
 !       0  0  0  0  0  0  0 98 99
 !
 !    would appear in the first 1+1+2=4 rows of W as follows:
 !
 !       0  0 13 24 35 46 57 68 79
 !       0 12 23 34 45 56 67 78 89
 !      11 22 33 44 55 66 77 88 99
 !      21 32 43 54 65 76 87 98  0
 !
 !    All other entries of W not identified in this way with an
 !    entry of A are never referenced.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input/output, real ( kind = 8 ) W(NROWW,NROW).
 !    On input, W contains the "interesting" part of a banded
 !    matrix A, with the diagonals or bands of A stored in the
 !    rows of W, while columns of A correspond to columns of W.
 !    On output, W contains the LU-factorization of A into a unit
 !    lower triangular matrix L and an upper triangular matrix U
 !    (both banded) and stored in customary fashion over the
 !    corresponding entries of A.
 !
 !    This makes it possible to solve any particular linear system A*X=B
 !    for X by the call
 !
 !      call banslv ( w, nroww, nrow, nbandl, nbandu, b )
 !
 !    with the solution X contained in B on return.
 !
 !    If IFLAG=2, then one of NROW-1, NBANDL, NBANDU failed to be nonnegative,
 !    or else one of the potential pivots was found to be zero
 !    indicating that A does not have an LU-factorization.  This
 !    implies that A is singular in case it is totally positive.
 !
 !    Input, integer NROWW, the row dimension of the work array W.
 !    NROWW must be at least NBANDL+1 + NBANDU.
 !
 !    Input, integer NROW, the number of rows in A.
 !
 !    Input, integer NBANDL, the number of bands of A below the main diagonal.
 !
 !    Input, integer NBANDU, the number of bands of A above the main diagonal.
 !
 !    Output, integer IFLAG, error flag.
 !    1, success.
 !    2, failure, the matrix was not factored.
 !
   implicit none
 
   integer nrow
   integer nroww
 
   real ( kind = 8 ) factor
   integer i
   integer iflag
   integer j
   integer k
   integer middle
   integer nbandl
   integer nbandu
   real ( kind = 8 ) pivot
   real ( kind = 8 ) w(nroww,nrow)
 
   iflag = 1
 
   if ( nrow < 1 ) then
     iflag = 2
     return
   end if
 !
 !  W(MIDDLE,*) contains the main diagonal of A.
 !
   middle = nbandu + 1
 
   if ( nrow == 1 ) then
     if ( w(middle,nrow) == 0.0D+00 ) then
       iflag = 2
     end if
     return
   end if
 !
 !  A is upper triangular.  Check that the diagonal is nonzero.
 !
   if ( nbandl <= 0 ) then
 
     do i = 1, nrow-1
       if ( w(middle,i) == 0.0D+00 ) then
         iflag = 2
         return
       end if
     end do
 
     if ( w(middle,nrow) == 0.0D+00 ) then
       iflag = 2
     end if
 
     return
 !
 !  A is lower triangular.  Check that the diagonal is nonzero and
 !  divide each column by its diagonal.
 !
   else if ( nbandu <= 0 ) then
 
     do i = 1, nrow-1
 
       pivot = w(middle,i)
 
       if ( pivot == 0.0D+00 ) then
         iflag = 2
         return
       end if
 
       do j = 1, min ( nbandl, nrow-i )
         w(middle+j,i) = w(middle+j,i) / pivot
       end do
 
     end do
 
     return
 
   end if
 !
 !  A is not just a triangular matrix.
 !  Construct the LU factorization.
 !
   do i = 1, nrow-1
 !
 !  W(MIDDLE,I) is the pivot for the I-th step.
 !
     if ( w(middle,i) == 0.0D+00 ) then
       iflag = 2
       write ( *, '(a)' ) ' '
       write ( *, '(a)' ) 'BANFAC - Fatal error!'
       write ( *, '(a,i6)' ) '  Zero pivot encountered in column ', i
       stop
     end if
 !
 !  Divide each entry in column I below the diagonal by PIVOT.
 !
     do j = 1, min ( nbandl, nrow-i )
       w(middle+j,i) = w(middle+j,i) / w(middle,i)
     end do
 !
 !  Subtract A(I,I+K)*(I-th column) from (I+K)-th column (below row I).
 !
     do k = 1, min ( nbandu, nrow-i )
       factor = w(middle-k,i+k)
       do j = 1, min ( nbandl, nrow-i )
         w(middle-k+j,i+k) = w(middle-k+j,i+k) - w(middle+j,i) * factor
       end do
     end do
 
   end do
 !
 !  Check the last diagonal entry.
 !
   if ( w(middle,nrow) == 0.0D+00 ) then
     iflag = 2
   end if
 
   return
 end
diff --git a/pppack/banslv.f90 b/pppack/banslv.f90
index a8159de..b0e1891 100644
--- a/pppack/banslv.f90
+++ b/pppack/banslv.f90
@@ -1,112 +1,112 @@
 !>
 !> @file banslv.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine banslv ( w, nroww, nrow, nbandl, nbandu, b )
 
 !*************************************************************************
 !
 !! BANSLV solves a banded linear system X * X = B factored by BANFAC.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) W(NROWW,NROW).  W contains the banded matrix,
 !    after it has been factored by BANFAC.
 !
 !    Input, integer NROWW, the row dimension of the work array W.
 !    NROWW must be at least NBANDL+1 + NBANDU.
 !
 !    Input, integer NROW, the number of rows in A.
 !
 !    Input, integer NBANDL, the number of bands of A below the
 !    main diagonal.
 !
 !    Input, integer NBANDU, the number of bands of A above the
 !    main diagonal.
 !
 !    Input/output, real ( kind = 8 ) B(NROW).
 !    On input, B contains the right hand side of the system to be solved.
 !    On output, B contains the solution, X.
 !
   implicit none
 
   integer nrow
   integer nroww
 
   real ( kind = 8 ) b(nrow)
   integer i
   integer j
   integer jmax
   integer middle
   integer nbandl
   integer nbandu
   real ( kind = 8 ) w(nroww,nrow)
 
   middle = nbandu + 1
 
   if ( nrow == 1 ) then
     b(1) = b(1) / w(middle,1)
     return
   end if
 !
 !  Forward pass
 !
 !  For I = 1, 2, ..., NROW-1, subtract RHS(I)*(I-th column of L)
 !  from the right side, below the I-th row.
 !
   if ( 0 < nbandl ) then
     do i = 1, nrow-1
       jmax = min ( nbandl, nrow-i )
       do j = 1, jmax
         b(i+j) = b(i+j) - b(i) * w(middle+j,i)
       end do
     end do
   end if
 !
 !  Backward pass
 !
 !  For I=NROW, NROW-1,...,1, divide RHS(I) by
 !  the I-th diagonal entry of U, then subtract
 !  RHS(I)*(I-th column of U) from right side, above the I-th row.
 !
   do i = nrow, 2, -1
 
     b(i) = b(i) / w(middle,i)
 
     do j = 1, min ( nbandu, i-1 )
       b(i-j) = b(i-j) - b(i) * w(middle-j,i)
     end do
 
   end do
 
   b(1) = b(1) / w(middle,1)
 
   return
 end
diff --git a/pppack/bchfac.f90 b/pppack/bchfac.f90
index c582ad0..67231c8 100644
--- a/pppack/bchfac.f90
+++ b/pppack/bchfac.f90
@@ -1,168 +1,168 @@
 !>
 !> @file bchfac.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine bchfac ( w, nbands, nrow, diag )
 
 !*************************************************************************
 !
 !! BCHFAC constructs a Cholesky factorization of a matrix.
 !
 !  Discussion:
 !
 !    The factorization has the form
 !
 !      C = L * D * L'
 !
 !    with L unit lower triangular and D diagonal, for a given matrix C of
 !    order NROW, where C is symmetric positive semidefinite and banded,
 !    having NBANDS diagonals at and below the main diagonal.
 !
 !    Gauss elimination is used, adapted to the symmetry and bandedness of C.
 !
 !    Near-zero pivots are handled in a special way.  The diagonal
 !    element C(N,N)=W(1,N) is saved initially in DIAG(N), all N.
 !
 !    At the N-th elimination step, the current pivot element, W(1,N),
 !    is compared with its original value, DIAG(N).  If, as the result
 !    of prior elimination steps, this element has been reduced by about
 !    a word length, (i.e., if W(1,N)+DIAG(N) <= DIAG(N)), then the pivot
 !    is declared to be zero, and the entire N-th row is declared to
 !    be linearly dependent on the preceding rows.  This has the effect
 !    of producing X(N) = 0 when solving C*X = B for X, regardless of B.
 !
 !    Justification for this is as follows.  In contemplated applications
 !    of this program, the given equations are the normal equations for
 !    some least-squares approximation problem, DIAG(N) = C(N,N) gives
 !    the norm-square of the N-th basis function, and, at this point,
 !    W(1,N) contains the norm-square of the error in the least-squares
 !    approximation to the N-th basis function by linear combinations
 !    of the first N-1.
 !
 !    Having W(1,N)+DIAG(N) <= DIAG(N) signifies that the N-th function
 !    is linearly dependent to machine accuracy on the first N-1
 !    functions, therefore can safely be left out from the basis of
 !    approximating functions.
 !
 !    The solution of a linear system C*X=B is effected by the
 !    succession of the following two calls:
 !
 !      CALL BCHFAC(W,NBANDS,NROW,DIAG)
 !
 !      CALL BCHSLV(W,NBANDS,NROW,B,X)
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input/output, real ( kind = 8 ) W(NBANDS,NROW).
 !
 !    On input, W contains the NBANDS diagonals in its rows,
 !    with the main diagonal in row 1.  Precisely, W(I,J)
 !    contains C(I+J-1,J), I=1,...,NBANDS, J=1,...,NROW.
 !
 !    For example, the interesting entries of a seven diagonal
 !    symmetric matrix C of order 9 would be stored in W as
 !
 !      11 22 33 44 55 66 77 88 99
 !      21 32 43 54 65 76 87 98  *
 !      31 42 53 64 75 86 97  *  *
 !      41 52 63 74 85 96  *  *  *
 !
 !    Entries of the array not associated with an
 !    entry of C are never referenced.
 !
 !    On output, W contains the Cholesky factorization
 !    C = L*D*L-transp, with W(1,I) containing 1/D(I,I) and W(I,J)
 !    containing L(I-1+J,J), I=2,...,NBANDS.
 !
 !    Input, integer NBANDS, indicates the bandwidth of the
 !    matrix C, i.e., C(I,J) = 0 for NBANDS < ABS(I-J).
 !
 !    Input, integer NROW, is the order of the matrix C.
 !
 !    Work array, real ( kind = 8 ) DIAG(NROW).
 !
   implicit none
 
   integer nbands
   integer nrow
 
   real ( kind = 8 ) diag(nrow)
   integer i
   integer imax
   integer j
   integer jmax
   integer n
   real ( kind = 8 ) ratio
   real ( kind = 8 ) w(nbands,nrow)
 
   if ( nrow <= 1 ) then
     if ( 0.0D+00 < w(1,1) ) then
       w(1,1) = 1.0D+00 / w(1,1)
     end if
     return
   end if
 !
 !  Store the diagonal.
 !
   diag(1:nrow) = w(1,1:nrow)
 !
 !  Factorization.
 !
   do n = 1, nrow
 
     if ( w(1,n) + diag(n) <= diag(n) ) then
       w(1:nbands,n) = 0.0D+00
     else
 
       w(1,n) = 1.0D+00 / w(1,n)
 
       imax = min ( nbands-1, nrow-n )
 
       jmax = imax
 
       do i = 1, imax
 
         ratio = w(i+1,n) * w(1,n)
 
         do j = 1, jmax
           w(j,n+i) = w(j,n+i) - w(j+i,n) * ratio
         end do
 
         jmax = jmax-1
         w(i+1,n) = ratio
 
       end do
 
     end if
 
   end do
 
   return
 end
diff --git a/pppack/bchslv.f90 b/pppack/bchslv.f90
index 503e3e1..93e9745 100644
--- a/pppack/bchslv.f90
+++ b/pppack/bchslv.f90
@@ -1,114 +1,114 @@
 !>
 !> @file bchslv.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine bchslv ( w, nbands, nrow, b )
 
 !*************************************************************************
 !
 !! BCHSLV solves a banded symmetric positive definite system.
 !
 !  Discussion:
 !
 !    The system is of the form:
 !
 !      C * X = B
 !
 !    and the Cholesky factorization of C has been constructed
 !    by BCHFAC.
 !
 !    With the factorization
 !
 !      C = L * D * L'
 !
 !    available, where L is unit lower triangular and D is diagonal,
 !    the triangular system
 !
 !      L * Y = B
 !
 !    is solved for Y (forward substitution), Y is stored in B, the
 !    vector D**(-1)*Y is computed and stored in B, then the
 !    triangular system L'*X = D**(-1)*Y is solved for X
 !    (backsubstitution).
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) W(NBANDS,NROW), the Cholesky factorization for C,
 !    as computed by BCHFAC.
 !
 !    Input, integer NBANDS, the bandwidth of C.
 !
 !    Input, integer NROW, the order of the matrix C.
 !
 !    Input/output, real ( kind = 8 ) B(NROW).
 !    On input, the right hand side.
 !    On output, the solution.
 !
   implicit none
 
   integer nbands
   integer nrow
 
   real ( kind = 8 ) b(nrow)
   integer j
   integer n
   real ( kind = 8 ) w(nbands,nrow)
 
   if ( nrow <= 1 ) then
     b(1) = b(1) * w(1,1)
     return
   end if
 !
 !  Forward substitution.
 !  Solve L*Y=B.
 !
   do n = 1, nrow
 
     do j = 1, min(nbands-1,nrow-n)
       b(j+n) = b(j+n) - w(j+1,n) * b(n)
     end do
 
   end do
 !
 !  Backsubstitution.
 !  Solve L'*X=D**(-1)*Y.
 !
   do n = nrow, 1, -1
 
     b(n) = b(n)*w(1,n)
 
     do j = 1, min(nbands-1,nrow-n)
       b(n) = b(n) - w(j+1,n) * b(j+n)
     end do
 
   end do
 
   return
 end
diff --git a/pppack/bsplpp.f90 b/pppack/bsplpp.f90
index 41b76bd..819f296 100644
--- a/pppack/bsplpp.f90
+++ b/pppack/bsplpp.f90
@@ -1,165 +1,165 @@
 !>
 !> @file bsplpp.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine bsplpp ( t, bcoef, n, k, scrtch, break, coef, l )
 
 !*************************************************************************
 !
 !! BSPLPP converts from B-spline to piecewise polynomial form.
 !
 !  Discussion:
 !
 !    The B-spline representation of a spline is ( T, BCOEF, N, K ),
 !    while the piecewise polynomial representation is
 !    ( BREAK, COEF, L, K ).
 !
 !    For each breakpoint interval, the K relevant B-spline coefficients
 !    of the spline are found and then differenced repeatedly to get the
 !    B-spline coefficients of all the derivatives of the spline on that
 !    interval.
 !
 !    The spline and its first K-1 derivatives are then evaluated at the
 !    left end point of that interval, using BSPLVB repeatedly to obtain
 !    the values of all B-splines of the appropriate order at that point.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) T(N+K), the knot sequence.
 !
 !    Input, real ( kind = 8 ) BCOEF(N), the B spline coefficient sequence.
 !
 !    Input, integer N, the number of B spline coefficients.
 !
 !    Input, integer K, the order of the spline.
 !
 !    Work array, real ( kind = 8 ) SCRTCH(K,K).
 !
 !    Output, real ( kind = 8 ) BREAK(L+1), the piecewise polynomial breakpoint
 !    sequence.  BREAK contains the distinct points in the
 !    sequence T(K),...,T(N+1)
 !
 !    Output, real ( kind = 8 ) COEF(K,N), with COEF(I,J) = (I-1)st derivative
 !    of the spline at BREAK(J) from the right.
 !
 !    Output, integer L, the number of polynomial pieces which
 !    make up the spline in the interval (T(K),T(N+1)).
 !
   implicit none
 
   integer k
   integer l
   integer n
 
   real ( kind = 8 ) bcoef(n)
   real ( kind = 8 ) biatx(k)
   real ( kind = 8 ) break(*)
   real ( kind = 8 ) coef(k,n)
   real ( kind = 8 ) diff
   integer i
   integer j
   integer jp1
   integer left
   integer lsofar
   real ( kind = 8 ) scrtch(k,k)
   real ( kind = 8 ) sum1
   real ( kind = 8 ) t(n+k)
 
   lsofar = 0
   break(1) = t(k)
 
   do left = k, n
 !
 !  Find the next nontrivial knot interval.
 !
     if ( t(left+1) == t(left) ) then
       cycle
     end if
 
     lsofar = lsofar + 1
     break(lsofar+1) = t(left+1)
 
     if ( k <= 1 ) then
       coef(1,lsofar) = bcoef(left)
       cycle
     end if
 !
 !  Store the K B-spline coefficients relevant to current knot
 !  interval in SCRTCH(*,1).
 !
     do i = 1, k
       scrtch(i,1) = bcoef(left-k+i)
     end do
 !
 !  For j=1,...,k-1, compute the  k-j  b-spline coefficients relevant to
 !  current knot interval for the j-th derivative by differencing
 !  those for the (j-1)st derivative, and store in scrtch(.,j+1) .
 !
     do jp1 = 2, k
       j = jp1-1
       do i = 1, k-j
         diff = t(left+i)-t(left+i-(k-j))
         if ( 0.0D+00 < diff ) then
           scrtch(i,jp1)=((scrtch(i+1,j)-scrtch(i,j)) / diff ) &
             * real ( k - j, kind = 8 )
         end if
       end do
     end do
 !
 !  For J=0, ..., K-1, find the values at T(left)  of the  j+1
 !  B-splines of order J+1 whose support contains the current
 !  knot interval from those of order J (in  biatx ), then comb-
 !  ine with the B-spline coefficients (in scrtch(.,k-j) ) found earlier
 !  to compute the (k-j-1)st derivative at  t(left)  of the given
 !  spline.
 !
     call bsplvb ( t, 1, 1, t(left), left, biatx )
 
     coef(k,lsofar) = scrtch(1,k)
 
     do jp1 = 2, k
 
       call bsplvb ( t, jp1, 2, t(left), left, biatx )
 
       sum1 = 0.0D+00
       do i = 1, jp1
         sum1 = sum1 + biatx(i) * scrtch(i,k+1-jp1)
       end do
 
       coef(k+1-jp1,lsofar) = sum1
 
     end do
 
   end do
 
   l = lsofar
 
   return
 end
diff --git a/pppack/bsplvb.f90 b/pppack/bsplvb.f90
index 89d9578..54e9460 100644
--- a/pppack/bsplvb.f90
+++ b/pppack/bsplvb.f90
@@ -1,170 +1,170 @@
 !>
 !> @file bsplvb.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine bsplvb ( t, jhigh, index, x, left, biatx )
 
 !*************************************************************************
 !
 !! BSPLVB evaluates B-splines at a point X with a given knot sequence.
 !
 !  Discusion:
 !
 !    BSPLVB evaluates all possibly nonzero B-splines at X of order
 !
 !      JOUT = MAX ( JHIGH, (J+1)*(INDEX-1) )
 !
 !    with knot sequence T.
 !
 !    The recurrence relation
 !
 !                     X - T(I)               T(I+J+1) - X
 !    B(I,J+1)(X) = ----------- * B(I,J)(X) + --------------- * B(I+1,J)(X)
 !                  T(I+J)-T(I)               T(I+J+1)-T(I+1)
 !
 !    is used to generate B(LEFT-J:LEFT,J+1)(X) from B(LEFT-J+1:LEFT,J)(X)
 !    storing the new values in BIATX over the old.
 !
 !    The facts that
 !
 !      B(I,1)(X) = 1  if  T(I) <= X < T(I+1)
 !
 !    and that
 !
 !      B(I,J)(X) = 0  unless  T(I) <= X < T(I+J)
 !
 !    are used.
 !
 !    The particular organization of the calculations follows
 !    algorithm 8 in chapter X of the text.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) T(LEFT+JOUT), the knot sequence.  T is assumed to
 !    be nondecreasing, and also, T(LEFT) must be strictly less than
 !    T(LEFT+1).
 !
 !    Input, integer JHIGH, INDEX, determine the order
 !    JOUT = MAX(JHIGH,(J+1)*(INDEX-1))
 !    of the B-splines whose values at X are to be returned.
 !    INDEX is used to avoid recalculations when several
 !    columns of the triangular array of B-spline values are
 !    needed, for example, in BVALUE or in BSPLVD.
 !
 !    If INDEX = 1, the calculation starts from scratch and the entire
 !    triangular array of B-spline values of orders
 !    1, 2, ...,JHIGH is generated order by order, i.e.,
 !    column by column.
 !
 !    If INDEX = 2, only the B-spline values of order J+1, J+2, ..., JOUT
 !    are generated, the assumption being that BIATX, J,
 !    DELTAL, DELTAR are, on entry, as they were on exit
 !    at the previous call.  In particular, if JHIGH = 0,
 !    then JOUT = J+1, i.e., just the next column of B-spline
 !    values is generated.
 !
 !    WARNING: the restriction  JOUT <= JMAX (= 20) is
 !    imposed arbitrarily by the dimension statement for DELTAL
 !    and DELTAR, but is nowhere checked for.
 !
 !    Input, real ( kind = 8 ) X, the point at which the B-splines
 !    are to be evaluated.
 !
 !    Input, integer LEFT, an integer chosen so that
 !    T(LEFT) <= X <= T(LEFT+1).
 !
 !    Output, real ( kind = 8 ) BIATX(JOUT), with BIATX(I) containing the
 !    value at X of the polynomial of order JOUT which agrees
 !    with the B-spline B(LEFT-JOUT+I,JOUT,T) on the interval
 !    (T(LEFT),T(LEFT+1)).
 !
   implicit none
 
   integer, parameter :: jmax = 20
 
   integer jhigh
 
   real ( kind = 8 ) biatx(jhigh)
 !!$  real ( kind = 8 ), save, dimension ( jmax ) :: deltal
 !!$  real ( kind = 8 ), save, dimension ( jmax ) :: deltar
   real ( kind = 8 ), dimension ( jmax ) :: deltal
   real ( kind = 8 ), dimension ( jmax ) :: deltar
   integer i
   integer index
 !!$  integer, save :: j = 1
   integer :: j
   integer left
   real ( kind = 8 ) saved
   real ( kind = 8 ) t(left+jhigh)
   real ( kind = 8 ) term
   real ( kind = 8 ) x
 
 ! Forces starting always from scratch!
 !!$  if ( index == 1 ) then
     j = 1
     biatx(1) = 1.0D+00
     if ( jhigh <= j ) then
       return
     end if
 !!$  end if
 
   if ( t(left+1) <= t(left) ) then
     write ( *, '(a)' ) ' '
     write ( *, '(a)' ) 'BSPLVB - Fatal error!'
     write ( *, '(a)' ) '  It is required that T(LEFT) < T(LEFT+1).'
     write ( *, '(a,i6)' ) '  But LEFT = ', left
     write ( *, '(a,g14.6)' ) '  T(LEFT) =   ', t(left)
     write ( *, '(a,g14.6)' ) '  T(LEFT+1) = ', t(left+1)
     stop
   end if
 
   do
 
     deltar(j) = t(left+j) - x
     deltal(j) = x - t(left+1-j)
 
     saved = 0.0D+00
     do i = 1, j
       term = biatx(i) / ( deltar(i) + deltal(j+1-i) )
       biatx(i) = saved + deltar(i) * term
       saved = deltal(j+1-i) * term
     end do
 
     biatx(j+1) = saved
     j = j + 1
 
     if ( jhigh <= j ) then
       exit
     end if
 
   end do
 
   return
 end
diff --git a/pppack/bsplvd.f90 b/pppack/bsplvd.f90
index 82d203c..e4b831f 100644
--- a/pppack/bsplvd.f90
+++ b/pppack/bsplvd.f90
@@ -1,189 +1,189 @@
 !>
 !> @file bsplvd.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine bsplvd ( t, k, x, left, a, dbiatx, nderiv )
 
 !*************************************************************************
 !
 !! BSPLVD calculates the nonvanishing B-splines and derivatives at X.
 !
 !  Discussion:
 !
 !    Values at X of all the relevant B-splines of order K, K-1,..., K+1-NDERIV
 !    are generated via BSPLVB and stored temporarily in DBIATX.
 !
 !    Then, the B-spline coefficients of the required derivatives
 !    of the B-splines of interest are generated by differencing,
 !    each from the preceding one of lower order, and combined with
 !    the values of B-splines of corresponding order in DBIATX
 !    to produce the desired values.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) T(LEFT+K), the knot sequence.  It is assumed that
 !    T(LEFT) < T(LEFT+1).  Also, the output is correct only if
 !    T(LEFT) <= X <= T(LEFT+1) .
 !
 !    Input, integer K, the order of the B-splines to be evaluated.
 !
 !    Input, real ( kind = 8 ) X, the point at which these values are sought.
 !
 !    Input, integer LEFT, indicates the left endpoint of the interval of
 !    interest.  The K B-splines whose support contains the interval
 !    (T(LEFT), T(LEFT+1)) are to be considered.
 !
 !    Workspace, real ( kind = 8 ) A(K,K).
 !
 !    Output, real ( kind = 8 ) DBIATX(K,NDERIV).  DBIATX(I,M) contains
 !    the value of the (M-1)st derivative of the (LEFT-K+I)-th B-spline
 !    of order K for knot sequence T, I=M,...,K, M=1,...,NDERIV.
 !
 !    Input, integer NDERIV, indicates that values of
 !    B-splines and their derivatives up to but not
 !    including the NDERIV-th are asked for.
 !
   implicit none
 
   integer k
   integer left
   integer nderiv
 
   real ( kind = 8 ) a(k,k)
   real ( kind = 8 ) dbiatx(k,nderiv)
   real ( kind = 8 ) factor
   real ( kind = 8 ) fkp1mm
   integer i
   integer ideriv
   integer il
   integer j
   integer jlow
   integer jp1mid
   integer ldummy
   integer m
   integer mhigh
   real ( kind = 8 ) sum1
   real ( kind = 8 ) t(left+k)
   real ( kind = 8 ) x
 
   mhigh = max ( min ( nderiv, k ), 1 )
 !
 !  MHIGH is usually equal to nderiv.
 !
   call bsplvb ( t, k+1-mhigh, 1, x, left, dbiatx )
 
   if ( mhigh == 1 ) then
     return
   end if
 !
 !  The first column of DBIATX always contains the B-spline values
 !  for the current order.  These are stored in column K+1-current
 !  order  before BSPLVB is called to put values for the next
 !  higher order on top of it.
 !
   ideriv = mhigh
   do m = 2, mhigh
     jp1mid = 1
     do j = ideriv, k
       dbiatx(j,ideriv) = dbiatx(jp1mid,1)
       jp1mid = jp1mid+1
     end do
     ideriv = ideriv-1
     call bsplvb(t,k+1-ideriv,2,x,left,dbiatx)
   end do
 !
 !  At this point,  b(left-k+i, k+1-j)(x) is in  dbiatx(i,j) for
 !  i=j,...,k and j=1,...,mhigh ('=' nderiv). in particular, the
 !  first column of  dbiatx  is already in final form. to obtain cor-
 !  ???  LOST A LINE ???
 !  rate their b-repr. by differencing, then evaluate at  x.
 !
   jlow = 1
   do i = 1, k
     do j = jlow,k
       a(j,i) = 0.0D+00
     end do
     jlow = i
     a(i,i) = 1.0D+00
   end do
 !
 !  At this point, a(.,j) contains the b-coefficients for the J-th of the
 !  k  b-splines of interest here.
 !
   do m = 2, mhigh
 
     fkp1mm = real ( k + 1 - m, kind = 8 )
     il = left
     i = k
 !
 !  For j=1,...,k, construct b-coefficients of  (m-1)st  derivative of
 !  b-splines from those for preceding derivative by differencing
 !  and store again in  a(.,j) .  The fact that  a(i,j)=0  for
 !  i < j  is used.
 !
     do ldummy = 1, k+1-m
 
       factor = fkp1mm/(t(il+k+1-m)-t(il))
 !
 !  The assumption that t(left) < t(left+1) makes denominator
 !  in  factor  nonzero.
 !
       do j = 1, i
         a(i,j) = (a(i,j)-a(i-1,j))*factor
       end do
 
       il = il-1
       i = i-1
 
     end do
 !
 !  For i=1,...,k, combine b-coefficients a(.,i) with B-spline values
 !  stored in dbiatx(.,m) to get value of  (m-1)st  derivative of
 !  i-th b-spline (of interest here) at  x , and store in
 !  dbiatx(i,m). storage of this value over the value of a b-spline
 !  of order m there is safe since the remaining b-spline derivat-
 !  ives of the same order do not use this value due to the fact
 !  that  a(j,i)=0  for j < i.
 !
     do i = 1, k
 
       sum1 = 0.0D+00
       jlow = max(i,m)
       do j = jlow,k
         sum1 = sum1 + a(j,i) * dbiatx(j,m)
       end do
 
       dbiatx(i,m) = sum1
     end do
 
   end do
 
   return
 end
diff --git a/pppack/bspp2d.f90 b/pppack/bspp2d.f90
index 59db260..e50c5da 100644
--- a/pppack/bspp2d.f90
+++ b/pppack/bspp2d.f90
@@ -1,203 +1,203 @@
 !>
 !> @file bspp2d.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine bspp2d ( t, bcoef, n, k, m, scrtch, break, coef, l )
 
 !*************************************************************************
 !
 !! BSPP2D converts from B-spline to piecewise polynomial representation.
 !
 !  Discussion:
 !
 !    The B-spline representation
 !
 !      T, BCOEF(.,J), N, K
 !
 !    is converted to its piecewise polynomial representation
 !
 !      BREAK, COEF(J,.,.), L, K, J=1, ..., M.
 !
 !    This is an extended version of BSPLPP for use with tensor products.
 !
 !    For each breakpoint interval, the K relevant B-spline
 !    coefficients of the spline are found and then differenced
 !    repeatedly to get the B-spline coefficients of all the
 !    derivatives of the spline on that interval.
 !
 !    The spline and its first K-1 derivatives are then evaluated
 !    at the left endpoint of that interval, using BSPLVB
 !    repeatedly to obtain the values of all B-splines of the
 !    appropriate order at that point.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) T(N+K), the knot sequence.
 !
 !    Input, real ( kind = 8 ) BCOEF(N,M).  For each J, B(*,J) is the
 !    B-spline coefficient sequence, of length N.
 !
 !    Input, integer N, the length of BCOEF.
 !
 !    Input, integer K, the order of the spline.
 !
 !    Input, integer M, the number of data sets.
 !
 !    Work array, real ( kind = 8 ) SCRTCH(K,K,M).
 !
 !    Output, real ( kind = 8 ) BREAK(L+1), the breakpoint sequence
 !    containing the distinct points in the sequence T(K),...,T(N+1)
 !
 !    Output, real ( kind = 8 ) COEF(M,K,N), with COEF(MM,I,J) = the (I-1)st
 !    derivative of the MM-th spline at BREAK(J) from the right, MM=1, ..., M.
 !
 !    Output, integer L, the number of polynomial pieces which make up the
 !    spline in the interval (T(K), T(N+1)).
 !
   implicit none
 
   integer k
   integer m
   integer n
 
   real ( kind = 8 ) bcoef(n,m)
   real ( kind = 8 ) biatx(k)
   real ( kind = 8 ) break(*)
   real ( kind = 8 ) coef(m,k,*)
   real ( kind = 8 ) diff
   real ( kind = 8 ) fkmj
   integer i
   integer j
   integer jp1
   integer kmj
   integer l
   integer left
   integer lsofar
   integer mm
   real ( kind = 8 ) scrtch(k,k,m)
   real ( kind = 8 ) sum1
   real ( kind = 8 ) t(n+k)
 
   lsofar = 0
   break(1) = t(k)
 
   do left = k, n
 !
 !  Find the next nontrivial knot interval.
 !
     if ( t(left+1) == t(left) ) then
       cycle
     end if
 
     lsofar = lsofar+1
     break(lsofar+1) = t(left+1)
 
     if ( k <= 1 ) then
 
       do mm = 1, m
         coef(mm,1,lsofar) = bcoef(left,mm)
       end do
 
       cycle
 
     end if
 !
 !  Store the K b-spline coefficients relevant to current knot interval
 !  in  scrtch(.,1) .
 !
     do i = 1, k
       do mm = 1, m
         scrtch(i,1,mm) = bcoef(left-k+i,mm)
       end do
     end do
 !
 !  for j=1,...,k-1, compute the  k-j  b-spline coefficients relevant to
 !  current knot interval for the j-th derivative by differencing
 !  those for the (j-1)st derivative, and store in scrtch(.,j+1) .
 !
     do jp1 = 2, k
 
       j = jp1-1
       kmj = k-j
       fkmj = real ( k - j, kind = 8 )
 
       do i = 1, k-j
 
         diff = (t(left+i)-t(left+i-kmj))/fkmj
 
         if ( 0.0D+00 < diff ) then
 
           do mm = 1, m
             scrtch(i,jp1,mm)=(scrtch(i+1,j,mm)-scrtch(i,j,mm))/diff
           end do
 
         end if
 
       end do
 
     end do
 !
 !  For  j=0, ..., k-1, find the values at T(left)  of the  j+1
 !  b-splines of order  j+1  whose support contains the current
 !  knot interval from those of order  j  (in  biatx ), then comb-
 !  ine with the b-spline coefficients (in scrtch(.,k-j) ) found earlier
 !  to compute the (k-j-1)st derivative at  t(left)  of the given
 !  spline.
 !
     call bsplvb ( t, 1, 1, t(left), left, biatx )
 
     do mm = 1, m
       coef(mm,k,lsofar) = scrtch(1,k,mm)
     end do
 
     do jp1 = 2, k
 
       call bsplvb (t,jp1,2,t(left),left,biatx)
       kmj = k+1-jp1
 
       do mm = 1, m
 
         sum1 = 0.0D+00
         do i = 1, jp1
           sum1 = sum1 + biatx(i) * scrtch(i,kmj,mm)
         end do
 
         coef(mm,kmj,lsofar) = sum1
 
       end do
 
     end do
 
   end do
 
   l = lsofar
 
   return
 end
diff --git a/pppack/bvalue.f90 b/pppack/bvalue.f90
index ead864f..04af9b5 100644
--- a/pppack/bvalue.f90
+++ b/pppack/bvalue.f90
@@ -1,226 +1,226 @@
 !>
 !> @file bvalue.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 function bvalue ( t, bcoef, n, k, x, jderiv )
 
 !*************************************************************************
 !
 !! BVALUE evaluates a derivative of a spline from its B-spline representation.
 !
 !  Discussion:
 !
 !    The spline is taken to be continuous from the right.
 !
 !    The nontrivial knot interval (T(I),T(I+1)) containing X is
 !    located with the aid of INTERV.  The K B-spline coefficients
 !    of F relevant for this interval are then obtained from BCOEF,
 !    or are taken to be zero if not explicitly available, and are
 !    then differenced JDERIV times to obtain the B-spline
 !    coefficients of (D**JDERIV)F relevant for that interval.
 !
 !    Precisely, with J = JDERIV, we have from X.(12) of the text that:
 !
 !      (D**J)F = sum ( BCOEF(.,J)*B(.,K-J,T) )
 !
 !    where
 !                      / BCOEF(.),                    ,  J == 0
 !                     /
 !       BCOEF(.,J) = / BCOEF(.,J-1) - BCOEF(.-1,J-1)
 !                   / -----------------------------,  0 < J
 !                  /    (T(.+K-J) - T(.))/(K-J)
 !
 !    Then, we use repeatedly the fact that
 !
 !      sum ( A(.)*B(.,M,T)(X) ) = sum ( A(.,X)*B(.,M-1,T)(X) )
 !
 !    with
 !                   (X - T(.))*A(.) + (T(.+M-1) - X)*A(.-1)
 !      A(.,X) =   ---------------------------------------
 !                   (X - T(.))      + (T(.+M-1) - X)
 !
 !    to write (D**J)F(X) eventually as a linear combination of
 !    B-splines of order 1, and the coefficient for B(I,1,T)(X)
 !    must then be the desired number (D**J)F(X).
 !    See x.(17)-(19) of text.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) T(N+K), the knot sequence.  T is assumed
 !    to be nondecreasing.
 !
 !    Input, real ( kind = 8 ) BCOEF(N), B-spline coefficient sequence.
 !
 !    Input, integer N, the length of BCOEF.
 !
 !    Input, integer K, the order of the spline.
 !
 !    Input, real ( kind = 8 ) X, the point at which to evaluate.
 !
 !    Input, integer JDERIV, the order of the derivative to
 !    be evaluated.  JDERIV is assumed to be zero or positive.
 !
 !    Output, real ( kind = 8 ) BVALUE, the value of the (JDERIV)-th
 !    derivative of the spline at X.
 !
   implicit none
 
   integer k
   integer n
 
   real ( kind = 8 ) aj(k)
   real ( kind = 8 ) bcoef(n)
   real ( kind = 8 ) bvalue
   real ( kind = 8 ) dl(k)
   real ( kind = 8 ) dr(k)
   integer i
   integer ilo
   integer j
   integer jc
   integer jcmax
   integer jcmin
   integer jderiv
   integer jj
   integer mflag
   real ( kind = 8 ) t(n+k)
   real ( kind = 8 ) x
 
   bvalue = 0.0D+00
 
   if ( k <= jderiv ) then
     return
   end if
 !
 !  Find I so that 1 <= i < n+k and t(i) < t(i+1) and t(i) <= x < t(i+1).
 !
 !  If no such i can be found, X lies
 !  outside the support of  the spline F and  bvalue=0.
 !  (the asymmetry in this choice of  i  makes F rightcontinuous)
 !
   call interv ( t, n+k, x, i, mflag )
 
   if ( mflag /= 0 ) then
     return
   end if
 !
 !  If K=1 (and jderiv = 0), bvalue = bcoef(i).
 !
   if ( k <= 1 ) then
     bvalue = bcoef(i)
     return
   end if
 !
 !  Store the K b-spline coefficients relevant for the knot interval
 !  (T(i),T(i+1)) in aj(1),...,aj(k) and compute dl(j)=x-t(i+1-j),
 !  dr(j)=T(i+j)-x, j=1,...,k-1 . set any of the aj not obtainable
 !  from input to zero.  Set any T's not obtainable equal to T(1) or
 !  to T(n+k) appropriately.
 !
   jcmin = 1
 
   if ( k <= i ) then
 
     do j = 1, k-1
       dl(j) = x-t(i+1-j)
     end do
 
   else
 
     jcmin = 1-(i-k)
 
     do j = 1, i
       dl(j) = x-t(i+1-j)
     end do
 
     do j = i, k-1
       aj(k-j) = 0.0D+00
       dl(j) = dl(i)
     end do
 
   end if
 
   jcmax = k
 
   if ( i <= n ) then
     go to 90
   end if
 
   jcmax = k + n - i
   do j = 1, k+n-i
     dr(j) = t(i+j)-x
   end do
 
   do j = k+n-i, k-1
     aj(j+1) = 0.0D+00
     dr(j) = dr(k+n-i)
   end do
 
   go to 110
 
    90 continue
 
   do j = 1, k-1
     dr(j) = t(i+j)-x
   end do
 
   110 continue
 
   do jc = jcmin, jcmax
     aj(jc) = bcoef(i-k+jc)
   end do
 !
 !  Difference the coefficients JDERIV times.
 !
   do j = 1, jderiv
 
     ilo = k-j
     do jj = 1, k-j
       aj(jj) = ((aj(jj+1)-aj(jj))/(dl(ilo)+dr(jj))) * real ( k - j, kind = 8 )
       ilo = ilo-1
     end do
 
   end do
 !
 !  Compute value at X in (t(i),t(i+1)) of jderiv-th derivative,
 !  given its relevant b-spline coefficients in aj(1),...,aj(k-jderiv).
 !
   do j = jderiv+1, k-1
     ilo = k-j
     do jj = 1, k-j
       aj(jj) = ( aj(jj+1) * dl(ilo) + aj(jj) * dr(jj) ) &
         / ( dl(ilo) + dr(jj) )
       ilo = ilo-1
     end do
   end do
 
   bvalue = aj(1)
 
   return
 end
diff --git a/pppack/chol1d.f90 b/pppack/chol1d.f90
index 8a0e19b..4fdfbb4 100644
--- a/pppack/chol1d.f90
+++ b/pppack/chol1d.f90
@@ -1,146 +1,146 @@
 !>
 !> @file chol1d.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine chol1d ( p, v, qty, npoint, u, qu )
 
 !*************************************************************************
 !
 !! CHOL1D sets up and solves linear systems needed by SMOOTH.
 !
 !  Discussion:
 !
 !    This routine constructs the upper three diagonals of
 !
 !      V(I,J), I=2 to NPOINT-1, J=1,3,
 !
 !    of the matrix
 !
 !      6*(1-P)*Q-transpose*(D**2)*Q + P*R.
 !
 !    It then computes its L*L' decomposition and stores it also
 !    in V, then applies forward and backsubstitution to the right side
 !
 !      Q'*Y
 !
 !    in QTY to obtain the solution in U.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) P, ?
 !
 !    ?put, real ( kind = 8 ) V(NPOINT,7), ?
 !
 !    ?put, real ( kind = 8 ) QTY(NPOINT), ?
 !
 !    Input, integer NPOINT, ?
 !
 !    Output, real ( kind = 8 ) U(NPOINT), the solution.
 !
 !    Output, real ( kind = 8 ) QU(NPOINT), the value of Q * U.
 !
   implicit none
 
   integer npoint
 
   integer i
   real ( kind = 8 ) p
   real ( kind = 8 ) qty(npoint)
   real ( kind = 8 ) qu(npoint)
   real ( kind = 8 ) u(npoint)
   real ( kind = 8 ) v(npoint,7)
   real ( kind = 8 ) prev
   real ( kind = 8 ) ratio
   real ( kind = 8 ) six1mp
   real ( kind = 8 ) twop
 !
 !  Construct 6*(1-p)*q'*(d**2)*q + p*r
 !
   six1mp = 6.0D+00 * ( 1.0D+00 - p )
   twop = 2.0D+00 * p
 
   do i = 2, npoint-1
     v(i,1) = six1mp * v(i,5)+twop*(v(i-1,4)+v(i,4))
     v(i,2) = six1mp * v(i,6)+p*v(i,4)
     v(i,3) = six1mp * v(i,7)
   end do
 
   if ( npoint < 4 ) then
     u(1) = 0.0D+00
     u(2) = qty(2) / v(2,1)
     u(3) = 0.0D+00
 !
 !  Factorization
 !
   else
 
     do i = 2, npoint-2
       ratio = v(i,2)/v(i,1)
       v(i+1,1) = v(i+1,1)-ratio*v(i,2)
       v(i+1,2) = v(i+1,2)-ratio*v(i,3)
       v(i,2) = ratio
       ratio = v(i,3)/v(i,1)
       v(i+2,1) = v(i+2,1)-ratio*v(i,3)
       v(i,3) = ratio
     end do
 !
 !  Forward substitution
 !
     u(1) = 0.0D+00
     v(1,3) = 0.0D+00
     u(2) = qty(2)
     do i = 2, npoint-2
       u(i+1) = qty(i+1)-v(i,2)*u(i)-v(i-1,3)*u(i-1)
     end do
 !
 !  Back substitution.
 !
     u(npoint) = 0.0D+00
     u(npoint-1) = u(npoint-1) / v(npoint-1,1)
 
     do i = npoint-2, 2, -1
       u(i) = u(i)/v(i,1)-u(i+1)*v(i,2)-u(i+2)*v(i,3)
     end do
 
   end if
 !
 !  Construct Q*U.
 !
   prev = 0.0D+00
   do i = 2, npoint
     qu(i) = (u(i)-u(i-1))/v(i-1,4)
     qu(i-1) = qu(i)-prev
     prev = qu(i)
   end do
 
   qu(npoint) = -qu(npoint)
 
   return
 end
diff --git a/pppack/colloc.f90 b/pppack/colloc.f90
index df60a0e..26b0031 100644
--- a/pppack/colloc.f90
+++ b/pppack/colloc.f90
@@ -1,275 +1,275 @@
 !>
 !> @file colloc.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine colloc ( aleft, aright, lbegin, iorder, ntimes, addbrk, &
   relerr )
 
 !*************************************************************************
 !
 !! COLLOC solves an ordinary differential equation by collocation.
 !
 !  Method:
 !
 !    The M-th order ordinary differential equation with M side
 !    conditions, to be specified in subroutine DIFEQU, is solved
 !    approximately by collocation.
 !
 !    The approximation F to the solution G is piecewise polynomial of order
 !    k+m with L pieces and M-1 continuous derivatives.   F is determined by
 !    the requirement that it satisfy the differential equation at K points
 !    per interval (to be specified in COLPNT ) and the M side conditions.
 !
 !    This usually nonlinear system of equations for f is solved by
 !    Newton's method. the resulting linear system for the b-coefficients of an
 !    iterate is constructed appropriately in eqblok and then solved
 !    in slvblk, a program designed to solve almost block
 !    diagonal linear systems efficiently.
 !
 !    There is an opportunity to attempt improvement of the breakpoint
 !    sequence (both in number and location) through use of NEWNOT.
 !
 !    Printed output consists of the pp-representation of the approximate
 !    solution, and of the error at selected points.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) ALEFT, ARIGHT, the endpoints of the interval.
 !
 !    Input, integer LBEGIN, the initial number of polynomial pieces
 !    in the approximation.  A uniform breakpoint sequence will be chosen.
 !
 !    Input, integer IORDER, the order of the polynomial pieces to be
 !    used in the approximation
 !
 !    Input, integer NTIMES, the number of passes to be made through NEWNOT.
 !
 !  addbrk   the number (possibly fractional) of breaks to be added per
 !           pass through newnot. e.g., if addbrk=.33334, then a break-
 !           point will be added at every third pass through newnot.
 !
 !  relerr   a tolerance.  Newton iteration is stopped if the difference
 !           between the b-coefficients of two successive iterates is no more
 !           than  relerr*(absolute largest b-coefficient).
 !
   implicit none
 
   integer, parameter :: npiece = 100
   integer, parameter :: ndim = 200
   integer, parameter :: ncoef = 2000
   integer, parameter :: lenblk = 2000
 
   real ( kind = 8 ) a(ndim)
   real ( kind = 8 ) addbrk
   real ( kind = 8 ) aleft
   real ( kind = 8 ) amax
   real ( kind = 8 ) aright
   real ( kind = 8 ) asave(ndim)
   real ( kind = 8 ) b(ndim)
   real ( kind = 8 ) bloks(lenblk)
   real ( kind = 8 ) break
   real ( kind = 8 ) coef
   real ( kind = 8 ) dx
   real ( kind = 8 ) err
   integer i
   integer iflag
   integer ii
   integer integs(3,npiece)
   integer iorder
   integer iside
   integer itemps(ndim)
   integer iter
   integer itermx
   integer j
   integer k
   integer kpm
   integer l
   integer lbegin
   integer lnew
   integer m
   integer n
   integer nbloks
   integer nt
   integer ntimes
   real ( kind = 8 ) relerr
   real ( kind = 8 ) rho
   real ( kind = 8 ) t(ndim)
   real ( kind = 8 ) templ(lenblk)
   real ( kind = 8 ) temps(ndim)
   real ( kind = 8 ) xside
 
   equivalence (bloks,templ)
 
   common /approx/ break(npiece),coef(ncoef),l,kpm
   common /side/ m,iside,xside(10)
   common /other/ itermx,k,rho(19)
 
   kpm = iorder
 
   if ( ncoef < lbegin * kpm ) then
     go to 120
   end if
 !
 !  Set the various parameters concerning the particular dif.equ.
 !  including a first approximation in case the de is to be solved by
 !  iteration ( 0 < itermx ).
 !
   call difequ ( 1, temps(1), temps )
 !
 !  Obtain the K collocation points for the standard interval.
 !
   k = kpm-m
   call colpnt(k,rho)
 !
 !  The following five statements could be replaced by a read in or-
 !  der to obtain a specific (nonuniform) spacing of the breakpnts.
 !
   dx = (aright-aleft) / real ( lbegin, kind = 8 )
 
   temps(1) = aleft
   do i = 2, lbegin
     temps(i) = temps(i-1)+dx
   end do
   temps(lbegin+1) = aright
 !
 !  Generate the required knots t(1),...,t(n+kpm).
 !
   call knots ( temps, lbegin, kpm, t, n )
   nt = 1
 !
 !  Generate the almost block diagonal coefficient matrix  bloks  and
 !  right side  b  from collocation equations and side conditions.
 !  then solve via  slvblk , obtaining the b-representation of the
 !  approximation in T, A, N, KPM.
 !
 20    continue
 
   call eqblok ( t, n, kpm, temps, a, bloks, lenblk, integs, nbloks, b )
 
   call slvblk ( bloks, integs, nbloks, b, itemps, a, iflag )
 
   iter = 1
   if ( itermx <= 1 ) then
     go to 60
   end if
 !
 !  Save b-spline coefficients of current approx. in  asave , then get new
 !  approx. and compare with old. if coefficients are more than  relerr
 !  apart (relatively) or if number of iterations is less than  itermx ,
 !  continue iterating.
 !
    30 continue
 
   call bsplpp(t,a,n,kpm,templ,break,coef,l)
 
   do i = 1, n
     asave(i) = a(i)
   end do
 
   call eqblok ( t, n, kpm, temps, a, bloks, lenblk, integs, nbloks, b )
 
   call slvblk(bloks,integs,nbloks,b,itemps,a,iflag)
 
   err = 0.0D+00
   amax = 0.0D+00
   do i = 1, n
     amax = max ( amax, abs ( a(i) ) )
     err = max ( err, abs ( a(i)-asave(i) ) )
   end do
 
   if ( err <= relerr*amax ) then
     go to 60
   end if
 
   iter = iter + 1
 
   if ( iter < itermx ) then
     go to 30
   end if
 !
 !  Iteration (if any) completed. print out approx. based on current
 !  breakpoint sequence, then try to improve the sequence.
 !
    60 continue
 
   write(*,70)kpm,l,n,(break(i),i=2,l)
    70 format (' approximation from a space of splines of order',i3, &
      ' on ',i3,' intervals,'/' of dimension',i4,'.  breakpoints -'/ &
      (5e20.10))
 
   if ( 0 < itermx ) then
     write(*,*)' '
     write(*,*)'Results on interation ',iter
   end if
 
   call bsplpp(t,a,n,kpm,templ,break,coef,l)
 
   write ( *, * ) ' '
   write ( *, * ) 'The piecewise polynomial representation of the approximation:'
   write ( *, * ) ' '
 
   do i = 1, l
     ii = ( i - 1 ) * kpm
     write(*,'(f9.3,e13.6,10e11.3)')break(i),(coef(ii+j),j=1,kpm)
   end do
 !
 !  The following call is provided here for possible further analysis
 !  of the approximation specific to the problem being solved.
 !  it is, of course, easily omitted.
 !
   call difequ ( 4, temps(1), temps )
 
   if ( ntimes < nt ) then
     return
   end if
 !
 !  From the pp-rep. of the current approx., obtain in NEWNOT a new
 !  (and possibly better) sequence of breakpoints, adding (on the
 !  average) ADDBRK breakpoints per pass through NEWNOT.
 !
   lnew = lbegin + int ( real ( nt, kind = 8 ) * addbrk )
 
   if ( ncoef < lnew * kpm ) then
     go to 120
   end if
 
   call newnot(break,coef,l,kpm,temps,lnew,templ)
 
   call knots ( temps, lnew, kpm, t, n )
   nt = nt+1
   go to 20
 
   120 continue
   write(*,*)' '
   write(*,*)'COLLOC - Fatal error!'
   write(*,*)'  The assigned dimension for COEF is ',ncoef
   write(*,*)'  but this is too small.'
   stop
 end
diff --git a/pppack/colpnt.f90 b/pppack/colpnt.f90
index 8bfb5ad..37ef621 100644
--- a/pppack/colpnt.f90
+++ b/pppack/colpnt.f90
@@ -1,117 +1,117 @@
 !>
 !> @file colpnt.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine colpnt ( k, rho )
 
 !*************************************************************************
 !
 !! COLPNT supplies collocation points.
 !
 !  Discussion:
 !
 !    The collocation points are for the standard interval (-1,1) as the
 !    zeros of the Legendre polynomial of degree K, provided K <= 8.
 !
 !    Otherwise, uniformly spaced points are given.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, integer K, the number of collocation points desired.
 !
 !    Output, real ( kind = 8 ) RHO(K), the collocation points.
 !
   implicit none
 
   integer k
 
   integer j
   real ( kind = 8 ) rho(k)
 
   if ( k == 1 ) then
     rho(1) = 0.0D+00
   else if ( k == 2 ) then
     rho(1) = -0.577350269189626D+00
     rho(2) =  0.577350269189626D+00
   else if ( k == 3 ) then
     rho(1) = -0.774596669241483D+00
     rho(2) =  0.0
     rho(3) =  0.774596669241483D+00
   else if ( k == 4 ) then
     rho(1) = -0.861136311594053D+00
     rho(2) = -0.339981043584856D+00
     rho(3) =  0.339981043584856D+00
     rho(4) =  0.861136311594053D+00
   else if ( k == 5 ) then
     rho(1) = -0.906179845938664D+00
     rho(2) = -0.538469310105683D+00
     rho(3) =  0.0D+00
     rho(4) =  0.538469310105683D+00
     rho(5) =  0.906179845938664D+00
   else if ( k == 6 ) then
     rho(1) = -0.932469514203152D+00
     rho(2) = -0.661209386466265D+00
     rho(3) = -0.238619186083197D+00
     rho(4) =  0.238619186083197D+00
     rho(5) =  0.661209386466265D+00
     rho(6) =  0.932469514203152D+00
   else if ( k == 7 ) then
     rho(5) = 0.405845151377397D+00
     rho(3) = -rho(5)
     rho(6) = 0.741531185599394D+00
     rho(2) = -rho(6)
     rho(7) = 0.949107912342759D+00
     rho(1) = -rho(7)
     rho(4) = 0.0
   else if ( k == 8 ) then
     rho(5) = 0.183434642495650D+00
     rho(4) = -rho(5)
     rho(6) = 0.525532409916329D+00
     rho(3) = -rho(6)
     rho(7) = 0.796666477413627D+00
     rho(2) = -rho(7)
     rho(8) = 0.960289856497536D+00
     rho(1) = -rho(8)
   else
 
     write ( *, * )' '
     write ( *, * )'ColPnt - Warning!'
     write ( *, * )'  Equispaced collocation points will be used,'
     write ( *, * )'  because K =',k,' which is greater than 8.'
 
     do j = 1, k
       rho(j) = -1.0D+00 + 2.0D+00 * real ( j - 1, kind = 8 ) &
                                   / real ( k - 1, kind = 8 )
     end do
 
   end if
 
   return
 end
diff --git a/pppack/cspint.f90 b/pppack/cspint.f90
index fb201fb..0de7b13 100644
--- a/pppack/cspint.f90
+++ b/pppack/cspint.f90
@@ -1,214 +1,214 @@
 !>
 !> @file cspint.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine cspint ( ftab, xtab, ntab, a, b, y, e, work, result, ind )
 
 !*************************************************************************
 !
 !! CSPINT estimates an integral using a spline interpolant.
 !
 !  Discussion:
 !
 !    CSPINT estimates the integral from A to B of F(X) by
 !    computing the natural spline S(X) that interpolates to F
 !    and integrating that exactly.
 !
 !    F is supplied to the routine in the form of tabulated data.
 !
 !    Other output from the program includes the definite integral
 !    from X(1) to X(I) of the spline, and the coefficients
 !    necessary for the user to evaluate the spline outside of
 !    this routine.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) FTAB(NTAB), contains the tabulated values
 !    of the functions, FTAB(I)=F(XTAB(I)).
 !
 !    Input, real ( kind = 8 ) XTAB(NTAB), contains the points at
 !    which the function was evaluated.  The XTAB's must be
 !    distinct and in ascending order.
 !
 !    Input, integer NTAB, the number of entries in FTAB
 !    and XTAB.  NTAB must be at least 3.
 !
 !    Input, real ( kind = 8 ) A, lower limit of integration.
 !
 !    Input, real ( kind = 8 ) B, upper limit of integration.
 !
 !    Output, real ( kind = 8 ) Y(3,NTAB), will contain the coefficients
 !    of the interpolating natural spline over each subinterval.
 !
 !    For XTAB(I) < = X  <= XTAB(I+1),
 !
 !      S(X) = FTAB(I) + Y(1,I)*(X-XTAB(I)) + Y(2,I)*(X-XTAB(I))**2
 !                 + Y(3,I)*(X-XTAB(I))**3
 !
 !    Output, real ( kind = 8 ) E(NTAB), E(I)=the definite integral
 !    from XTAB(1) to XTAB(I) of S(X).
 !
 !    Workspace, real ( kind = 8 ) WORK(NTAB).
 !
 !    Output, real ( kind = 8 ) RESULT, the estimated value of the integral.
 !
 !    Output, integer IND, error flag.
 !    IND=0 if NTAB < 3 or the XTAB's are not distinct and in
 !    ascending order.
 !    IND=1 otherwise.
 !
   implicit none
 
   integer ntab
 
   real ( kind = 8 ) a
   real ( kind = 8 ) b
   real ( kind = 8 ) e(ntab)
   real ( kind = 8 ) ftab(ntab)
   integer i
   integer ind
   integer j
   real ( kind = 8 ) r
   real ( kind = 8 ) result
   real ( kind = 8 ) s
   real ( kind = 8 ) term
   real ( kind = 8 ) u
   real ( kind = 8 ) work(ntab)
   real ( kind = 8 ) xtab(ntab)
   real ( kind = 8 ) y(3,ntab)
 
   ind = 0
 
   if ( ntab < 3 ) then
     write(*,*)' '
     write(*,*)'CSPINT - Fatal error!'
     write(*,*)'  NTAB must be at least 3,'
     write(*,*)'  but your value was NTAB = ',ntab
     stop
   end if
 
   do i = 1, ntab-1
 
     if ( xtab(i+1) <= xtab(i) ) then
       write(*,*)' '
       write(*,*)'CSPINT - Fatal error!'
       write(*,*)'  Interval ',i,' is illegal.'
       write(*,*)'  XTAB(I) =',xtab(i)
       write(*,*)'  XTAB(I+1)=',xtab(i+1)
       stop
     end if
 
   end do
 
   s = 0.0D+00
   do i = 1, ntab-1
     r = ( ftab(i+1) - ftab(i) ) / ( xtab(i+1) - xtab(i) )
     y(2,i) = r - s
     s = r
   end do
 
   result = 0.0D+00
   s = 0.0D+00
   r = 0.0D+00
   y(2,1) = 0.0D+00
   y(2,ntab) = 0.0D+00
 
   do i = 2, ntab-1
     y(2,i) = y(2,i) + r * y(2,i-1)
     work(i) = 2.0D+00 * ( xtab(i-1) - xtab(i+1) ) - r * s
     s = xtab(i+1) - xtab(i)
     r = s / work(i)
   end do
 
   do j = 2, ntab-1
     i = ntab+1-j
     y(2,i) = ((xtab(i+1)-xtab(i))*y(2,i+1)-y(2,i))/work(i)
   end do
 
   do i = 1, ntab-1
     s = xtab(i+1) - xtab(i)
     r = y(2,i+1) - y(2,i)
     y(3,i) = r / s
     y(2,i) = 3.0D+00 * y(2,i)
     y(1,i) = ( ftab(i+1) - ftab(i) ) / s - ( y(2,i) + r ) * s
   end do
 
   e(1) = 0.0D+00
 
   do i = 1, ntab-1
 
     s = xtab(i+1) - xtab(i)
 
     term = ( ( ( y(3,i) * 0.25D+00  * s &
                + y(2,i) / 3.0D+00 ) * s &
                + y(1,i) * 0.5D+00 ) * s + ftab(i) ) * s
 
     e(i+1) = e(i) + term
 
   end do
 !
 !  Determine where the endpoints A and B lie in the mesh of XTAB's.
 !
   r = a
   u = 1.0D+00
 
   do j = 1, 2
 
     if ( r <= xtab(1) ) then
       result = result-u*((r-xtab(1))*y(1,1)* 0.5D+00 + ftab(1))*(r-xtab(1))
     else if ( xtab(ntab) <= r ) then
       result = result-u*(e(ntab)+(r-xtab(ntab))*(ftab(ntab) + 0.5D+00 * &
         (ftab(ntab-1)+(xtab(ntab)-xtab(ntab-1))*y(1,ntab-1))*(r- &
         xtab(ntab))))
     else
       do i = 1, ntab-1
 
         if ( r <= xtab(i+1) ) then
           r = r - xtab(i)
           result = result-u*(e(i)+(((y(3,i)*0.25D+00*r+y(2,i)/3.0D+00)*r &
             +y(1,i) * 0.5D+00 )*r+ftab(i))*r)
           go to 100
         end if
 
       end do
 
     end if
 
   100   continue
 
     u = -1.0D+00
     r = b
 
   end do
 
   ind = 1
 
   return
 end
diff --git a/pppack/cubset.f90 b/pppack/cubset.f90
index 567b9de..9256b62 100644
--- a/pppack/cubset.f90
+++ b/pppack/cubset.f90
@@ -1,106 +1,106 @@
 !>
 !> @file cubset.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine cubset ( tau, c, n, ibcbeg, ibcend )
 
 !*******************************************************************************
 !
 !! CUBSET sets up a simple cubic spline interpolant.
 !
 !  WARNING: IBCBEG and IBCEND are not set up yet.
 !
 !  A tridiagonal linear system for the unknown slopes S(I) of
 !  F at TAU(I), I=1,..., N, is generated and then solved by Gauss
 !  elimination, with S(I) ending up in C(2,I), for all I.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) TAU(N), the abscissas or X values of
 !    the data points.  The entries of TAU are assumed to be
 !    strictly increasing.
 !
 !    Input, integer N, the number of data points.  N is
 !    assumed to be at least 2.
 !
 !    Input/output, real ( kind = 8 ) C(4,N).
 !    On input, if IBCBEG or IBCBEG is 1 or 2, then C(2,1)
 !    or C(2,N) should have been set to the desired derivative
 !    values, as described further under IBCBEG and IBCEND.
 !    On output, C contains the polynomial coefficients of
 !    the cubic interpolating spline with interior knots
 !    TAU(2) through TAU(N-1).
 !    In the interval interval (TAU(I), TAU(I+1)), the spline
 !    F is given by F(X) =
 !      C(1,I) +
 !      C(2,I) * ( X - TAU(I) ) +
 !      C(3,I) * ( X - TAU(I) )**2 +
 !      C(4,I) * ( X - TAU(I) )**3
 !
 !  IBCBEG,
 !  IBCEND Input, integer IBCBEG, IBCEND, boundary condition
 !         indicators.
 !
 !         IBCBEG=0 means no boundary condition at TAU(1) is given.
 !         In this case, the "not-a-knot condition" is used.  That
 !         is, the jump in the third derivative across TAU(2) is
 !         forced to zero.  Thus the first and the second cubic
 !         polynomial pieces are made to coincide.
 !
 !         IBCBEG=1 means that the slope at TAU(1) is to equal the
 !         input value C(2,1).
 !
 !         IBCBEG=2 means that the second derivative at TAU(1) is
 !         to equal C(2,1).
 !
 !         IBCEND=0, 1, or 2 has analogous meaning concerning the
 !         boundary condition at TAU(N), with the additional
 !         information taken from C(2,N).
 !
   implicit none
 
   integer n
 
   real ( kind = 8 ) c(4,n)
   integer ibcbeg
   integer ibcend
   real ( kind = 8 ) tau(n)
 !
 !  Solve for the slopes at internal nodes.
 !
   call cubslo ( tau, c, n )
 !
 !  Now compute the quadratic and cubic coefficients used in the
 !  piecewise polynomial representation.
 !
   call spline_hermite_set ( n, tau, c )
 
   return
 end
diff --git a/pppack/cubslo.f90 b/pppack/cubslo.f90
index 659792d..ffe3736 100644
--- a/pppack/cubslo.f90
+++ b/pppack/cubslo.f90
@@ -1,111 +1,111 @@
 !>
 !> @file cubslo.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine cubslo ( tau, c, n )
 
 !*******************************************************************************
 !
 !! CUBSLO solves for slopes defining a cubic spline.
 !
 !  Discussion:
 !
 !    A tridiagonal linear system for the unknown slopes S(I) of
 !    F at TAU(I), I=1,..., N, is generated and then solved by Gauss
 !    elimination, with S(I) ending up in C(2,I), for all I.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) TAU(N), the abscissas or X values of
 !    the data points.  The entries of TAU are assumed to be
 !    strictly increasing.
 !
 !    Input, integer N, the number of data points.  N is
 !    assumed to be at least 2.
 !
 !    Input/output, real ( kind = 8 ) C(4,N).
 !    On input, C(1,I) contains the function value at TAU(I),
 !    for I = 1 to N.
 !    C(2,1) contains the slope at TAU(1) and C(2,N) contains
 !    the slope at TAU(N).
 !    On output, the intermediate slopes at TAU(I) have been
 !    stored in C(2,I), for I = 2 to N-1.
 !
   implicit none
 
   integer n
 
   real ( kind = 8 ) c(4,n)
   real ( kind = 8 ) g
   integer i
   integer ibcbeg
   integer ibcend
   real ( kind = 8 ) tau(n)
 !
 !  Set up the right hand side of the linear system.
 !  C(2,1) and C(2,N) are presumably already set.
 !
   do i = 2, n-1
     c(2,i) = 3.0D+00 * ( &
       ( tau(i) - tau(i-1) ) * ( c(1,i+1) - c(1,i) ) / ( tau(i+1) - tau(i) ) + &
       ( tau(i+1) - tau(i) ) * ( c(1,i) - c(1,i-1) ) / ( tau(i) - tau(i-1) ) )
   end do
 !
 !  Set the diagonal coefficients.
 !
   c(4,1) = 1.0D+00
   do i = 2, n-1
     c(4,i) = 2.0D+00 * ( tau(i+1) - tau(i-1) )
   end do
   c(4,n) = 1.0D+00
 !
 !  Set the off-diagonal coefficients.
 !
   c(3,1) = 0.0D+00
   do i = 2, n
     c(3,i) = tau(i) - tau(i-1)
   end do
 !
 !  Forward elimination.
 !
   do i = 2, n-1
     g = -c(3,i+1) / c(4,i-1)
     c(4,i) = c(4,i) + g * c(3,i-1)
     c(2,i) = c(2,i) + g * c(2,i-1)
   end do
 !
 !  Back substitution for the interior slopes.
 !
   do i = n-1, 2, -1
     c(2,i) = ( c(2,i) - c(3,i) * c(2,i+1) ) / c(4,i)
   end do
 
   return
 end
diff --git a/pppack/cubspl.f90 b/pppack/cubspl.f90
index ffbc652..3973860 100644
--- a/pppack/cubspl.f90
+++ b/pppack/cubspl.f90
@@ -1,282 +1,282 @@
 !>
 !> @file cubspl.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine cubspl ( tau, c, n, ibcbeg, ibcend )
 
 !*******************************************************************************
 !
 !! CUBSPL defines an interpolatory cubic spline.
 !
 !  Discussion:
 !
 !    A tridiagonal linear system for the unknown slopes S(I) of
 !    F at TAU(I), I=1,..., N, is generated and then solved by Gauss
 !    elimination, with S(I) ending up in C(2,I), for all I.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) TAU(N), the abscissas or X values of
 !    the data points.  The entries of TAU are assumed to be
 !    strictly increasing.
 !
 !    Input, integer N, the number of data points.  N is
 !    assumed to be at least 2.
 !
 !    Input/output, real ( kind = 8 ) C(4,N).
 !    On input, if IBCBEG or IBCBEG is 1 or 2, then C(2,1)
 !    or C(2,N) should have been set to the desired derivative
 !    values, as described further under IBCBEG and IBCEND.
 !    On output, C contains the polynomial coefficients of
 !    the cubic interpolating spline with interior knots
 !    TAU(2) through TAU(N-1).
 !
 !    In the interval interval (TAU(I), TAU(I+1)), the spline
 !    F is given by
 !
 !      F(X) =
 !        C(1,I) +
 !        C(2,I) * H +
 !        C(3,I) * H**2 / 2 +
 !        C(4,I) * H**3 / 6.
 !
 !    where H=X-TAU(I).  The routine PPVALU may be used to
 !    evaluate F or its derivatives from TAU, C, L=N-1,
 !    and K=4.
 !
 !    Input, integer IBCBEG, IBCEND, boundary condition indicators.
 !
 !    IBCBEG=0 means no boundary condition at TAU(1) is given.
 !    In this case, the "not-a-knot condition" is used.  That
 !    is, the jump in the third derivative across TAU(2) is
 !    forced to zero.  Thus the first and the second cubic
 !    polynomial pieces are made to coincide.
 !
 !    IBCBEG=1 means the slope at TAU(1) is to equal the
 !    input value C(2,1).
 !
 !    IBCBEG=2 means the second derivative at TAU(1) is
 !    to equal C(2,1).
 !
 !    IBCEND=0, 1, or 2 has analogous meaning concerning the
 !    boundary condition at TAU(N), with the additional
 !    information taken from C(2,N).
 !
   implicit none
 
   integer n
 
   real ( kind = 8 ) c(4,n)
   real ( kind = 8 ) divdf1
   real ( kind = 8 ) divdf3
   real ( kind = 8 ) dtau
   real ( kind = 8 ) g
   integer i
   integer ibcbeg
   integer ibcend
   real ( kind = 8 ) tau(n)
 !
 !  C(3,*) and C(4,*) are used initially for temporary storage.
 !
 !  Store first differences of the TAU sequence in C(3,*).
 !
 !  Store first divided difference of data in C(4,*).
 !
   do i = 2, n
     c(3,i) = tau(i) - tau(i-1)
   end do
 
   do i = 2, n
     c(4,i) = ( c(1,i) - c(1,i-1) ) / ( tau(i) - tau(i-1) )
   end do
 !
 !  Construct the first equation from the boundary condition
 !  at the left endpoint, of the form:
 !
 !    C(4,1)*S(1) + C(3,1)*S(2) = C(2,1)
 !
 !  IBCBEG = 0: Not-a-knot
 !
   if ( ibcbeg == 0 ) then
 
     if ( n <= 2 ) then
       c(4,1) = 1.0D+00
       c(3,1) = 1.0D+00
       c(2,1) = 2.0D+00 * c(4,2)
       go to 120
     end if
 
     c(4,1) = c(3,3)
     c(3,1) = c(3,2) + c(3,3)
     c(2,1) = ( ( c(3,2) + 2.0D+00 * c(3,1) ) * c(4,2) * c(3,3) &
       + c(3,2)**2 * c(4,3) ) / c(3,1)
 !
 !  IBCBEG = 1: derivative specified.
 !
   else if ( ibcbeg == 1 ) then
 
     c(4,1) = 1.0D+00
     c(3,1) = 0.0D+00
 
     if ( n == 2 ) then
       go to 120
     end if
 !
 !  Second derivative prescribed at left end.
 !
   else
 
     c(4,1) = 2.0D+00
     c(3,1) = 1.0D+00
     c(2,1) = 3.0D+00 * c(4,2) - c(3,2) / 2.0D+00 * c(2,1)
 
     if ( n == 2 ) then
       go to 120
     end if
 
   end if
 !
 !  If there are interior knots, generate the corresponding
 !  equations and carry out the forward pass of Gauss elimination,
 !  after which the I-th equation reads:
 !
 !    C(4,I) * S(I) + C(3,I) * S(I+1) = C(2,I).
 !
   do i = 2, n-1
     g = -c(3,i+1) / c(4,i-1)
     c(2,i) = g * c(2,i-1) + 3.0D+00 * ( c(3,i) * c(4,i+1) + c(3,i+1) * c(4,i) )
     c(4,i) = g * c(3,i-1) + 2.0D+00 * ( c(3,i) + c(3,i+1))
   end do
 !
 !  Construct the last equation from the second boundary condition, of
 !  the form
 !
 !    -G * C(4,N-1) * S(N-1) + C(4,N) * S(N) = C(2,N)
 !
 !  If slope is prescribed at right end, one can go directly to
 !  back-substitution, since the C array happens to be set up just
 !  right for it at this point.
 !
   if ( ibcend == 1 ) then
     go to 160
   end if
 
   if ( 1 < ibcend ) then
     go to 110
   end if
 
 90    continue
 !
 !  Not-a-knot and 3 <= N, and either 3 < N or also not-a-knot
 !  at left end point.
 !
   if ( n /= 3 .or. ibcbeg /= 0 ) then
     g = c(3,n-1) + c(3,n)
     c(2,n) = ( ( c(3,n) + 2.0D+00 * g ) * c(4,n) * c(3,n-1) + c(3,n)**2 &
       * ( c(1,n-1) - c(1,n-2) ) / c(3,n-1) ) / g
     g = - g / c(4,n-1)
     c(4,n) = c(3,n-1)
     c(4,n) = c(4,n) + g * c(3,n-1)
     c(2,n) = ( g * c(2,n-1) + c(2,n) ) / c(4,n)
     go to 160
   end if
 !
 !  N=3 and not-a-knot also at left.
 !
 100   continue
 
   c(2,n) = 2.0D+00 * c(4,n)
   c(4,n) = 1.0D+00
   g = -1.0D+00 / c(4,n-1)
   c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1)
   c(2,n) = ( g * c(2,n-1) + c(2,n) ) / c(4,n)
   go to 160
 !
 !  IBCEND = 2: Second derivative prescribed at right endpoint.
 !
 110   continue
 
   c(2,n) = 3.0D+00 * c(4,n) + c(3,n) / 2.0D+00 * c(2,n)
   c(4,n) = 2.0D+00
   g = -1.0D+00 / c(4,n-1)
   c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1)
   c(2,n) = ( g * c(2,n-1)+c(2,n))/c(4,n)
   go to 160
 !
 !  N = 2.
 !
 120   continue
 
   if ( ibcend == 2  ) then
 
     c(2,n) = 3.0D+00 * c(4,n) + c(3,n) / 2.0D+00 * c(2,n)
     c(4,n) = 2.0D+00
     g = -1.0D+00 / c(4,n-1)
     c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1)
     c(2,n) = (g*c(2,n-1)+c(2,n)) / c(4,n)
 
   else if ( ibcend == 0 .and. ibcbeg /= 0 ) then
 
     c(2,n) = 2.0D+00 * c(4,n)
     c(4,n) = 1.0D+00
     g = -1.0D+00 / c(4,n-1)
     c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1)
     c(2,n) = (g*c(2,n-1)+c(2,n))/c(4,n)
 
   else if ( ibcend == 0 .and. ibcbeg == 0 ) then
 
     c(2,n) = c(4,n)
 
   end if
 !
 !  Back solve the upper triangular system
 !    C(4,I) * S(I) + C(3,I) * S(I+1) = B(I)
 !  for the slopes C(2,I), given that S(N) is already known.
 !
 160   continue
 
   do i = n-1, 1, -1
     c(2,i) = ( c(2,i) - c(3,i) * c(2,i+1) ) / c(4,i)
   end do
 !
 !  Generate cubic coefficients in each interval, that is, the
 !  derivatives at its left endpoint, from value and slope at its
 !  endpoints.
 !
   do i = 2, n
     dtau = c(3,i)
     divdf1 = ( c(1,i) - c(1,i-1) ) / dtau
     divdf3 = c(2,i-1) + c(2,i) - 2.0D+00 * divdf1
     c(3,i-1) = 2.0D+00 * ( divdf1 - c(2,i-1) - divdf3 ) / dtau
     c(4,i-1) = 6.0D+00 * divdf3 / dtau**2
   end do
 
   return
 end
diff --git a/pppack/cwidth.f90 b/pppack/cwidth.f90
index cdca694..cd8d383 100644
--- a/pppack/cwidth.f90
+++ b/pppack/cwidth.f90
@@ -1,351 +1,351 @@
 !>
 !> @file cwidth.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine cwidth ( w, b, nequ, ncols, integs, nbloks, d, x, iflag )
 
 !*************************************************************************
 !
 !! CWIDTH solves an almost block diagonal linear system.
 !
 !  Discussion:
 !
 !    This routine is a variation of the theme in the algorithm bandet1
 !    by Martin and Wilkinson (numer.math. 9(1976)279-307).  It solves
 !    the linear system
 !      A*X = B
 !    of NEQU equations in case A is almost block diagonal with all
 !    blocks having NCOLS columns using no more storage than it takes to
 !    store the interesting part of A.  Such systems occur in the determ-
 !    ination of the b-spline coefficients of a spline approximation.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !  w     on input, a two-dimensional array of size (nequ,ncols) contain-
 !        ing the interesting part of the almost block diagonal coeffici-
 !        ent matrix  a (see description and example below). the array
 !        integs  describes the storage scheme.
 !        on output, w  contains the upper triangular factor  u  of the
 !        lu factorization of a possibly permuted version of  a . in par-
 !        ticular, the determinant of  a  could now be found as
 !            iflag*w(1,1)*w(2,1)* ... * w(nequ,1)  .
 !
 !  b     on input, the right side of the linear system, of length  nequ.
 !        the contents of  b  are changed during execution.
 !
 !    Input, integer NEQU, the number of equations.
 !
 !    Input, integer NCOLS, the block width, that is, the number of
 !    columns in each block.
 !
 !  integs integer array, of size (2,nequ), describing the block
 !         structure of  a .
 !         integs(1,i)=no. of rows in block i              = nrow
 !         integs(2,i)=no. of elimination steps in block i
 !                     =overhang over next block             = last
 !  nbloks number of blocks
 !
 !  d      work array, to contain row sizes . if storage is scarce, the
 !         array  x  could be used in the calling sequence for  d .
 !
 !  x      on output, contains computed solution (if iflag /= 0), of
 !         length  nequ .
 !
 !  iflag  on output, integer
 !        =(-1)**(no.of interchanges during elimination)
 !                if  a  is invertible
 !        = 0   if  a  is singular
 !
 !  block structure of  a
 !
 !  the interesting part of  a  is taken to consist of  nbloks  con-
 !  secutive blocks, with the i-th block made up of  nrowi=integs(1,i)
 !  consecutive rows and  ncols  consecutive columns of  a , and with
 !  the first  lasti=integs(2,i) columns to the left of the next block.
 !  these blocks are stored consecutively in the workarray  w .
 !
 !  for example, here is an 11th order matrix and its arrangement in
 !  the workarray  w . (the interesting entries of  a  are indicated by
 !  their row and column index modulo 10.)
 !
 !                  ---   a   ---                          ---   w   ---
 !
 !                     nrow1=3
 !          11 12 13 14                                     11 12 13 14
 !          21 22 23 24                                     21 22 23 24
 !          31 32 33 34      nrow2=2                        31 32 33 34
 !   last1=2      43 44 45 46                               43 44 45 46
 !                53 54 55 56         nrow3=3               53 54 55 56
 !         last2=3         66 67 68 69                      66 67 68 69
 !                         76 77 78 79                      76 77 78 79
 !                         86 87 88 89   nrow4=1            86 87 88 89
 !                  last3=1   97 98 99 90   nrow5=2         97 98 99 90
 !                     last4=1   08 09 00 01                08 09 00 01
 !                               18 19 10 11                18 19 10 11
 !                        last5=4
 !
 !  for this interpretation of  a  as an almost block diagonal matrix,
 !  we have  nbloks=5 , and the integs array is
 !
 !                        i= 1   2   3   4   5
 !                  k=
 !  integs(k,i)=      1      3   2   3   1   2
 !                     2      2   3   1   1   4
 !
 !
 !  Method:
 !
 !  gauss elimination with scaled partial pivoting is used, but mult-
 !  ipliers are  n o t  s a v e d  in order to save storage. rather, the
 !  right side is operated on during elimination.  the two parameters
 !                  i p v t e q   and  l a s t e q
 !  are used to keep track of the action.  ipvteq is the index of the
 !  variable to be eliminated next, from equations  ipvteq+1,...,lasteq,
 !  using equation  ipvteq (possibly after an interchange) as the pivot
 !  equation. the entries in the pivot column are  a l w a y s  in column
 !  1 of  w . this is accomplished by putting the entries in rows
 !  ipvteq+1,...,lasteq  revised by the elimination of the  ipvteq-th
 !  variable one to the left in  w . in this way, the columns of the
 !  equations in a given block (as stored in  w ) will be aligned with
 !  those of the next block at the moment when these next equations be-
 !  come involved in the elimination process.
 !
 !  thus, for the above example, the first elimination steps proceed
 !  as follows.
 !
 !  *11 12 13 14    11 12 13 14    11 12 13 14    11 12 13 14
 !  *21 22 23 24   *22 23 24       22 23 24       22 23 24
 !  *31 32 33 34   *32 33 34      *33 34          33 34
 !   43 44 45 46    43 44 45 46   *43 44 45 46   *44 45 46        etc.
 !   53 54 55 56    53 54 55 56   *53 54 55 56   *54 55 56
 !   66 67 68 69    66 67 68 69    66 67 68 69    66 67 68 69
 !        .              .              .              .
 !
 !  In all other respects, the procedure is standard, including the
 !  scaled partial pivoting.
 !
   implicit none
 
   integer nbloks
   integer ncols
   integer nequ
 
   real ( kind = 8 ) awi1od
   real ( kind = 8 ) b(nequ)
   real ( kind = 8 ) colmax
   real ( kind = 8 ) d(nequ)
   integer i
   integer icount
   integer iflag
   integer ii
   integer integs(2,nbloks)
   integer ipvteq
   integer ipvtp1
   integer istar
   integer j
   integer jmax
   integer lastcl
   integer lasteq
   integer lasti
   integer nexteq
   integer nrowad
   real ( kind = 8 ) ratio
   real ( kind = 8 ) rowmax
   real ( kind = 8 ) sum1
   real ( kind = 8 ) temp
   real ( kind = 8 ) w(nequ,ncols)
   real ( kind = 8 ) x(nequ)
 
   iflag = 1
   ipvteq = 0
   lasteq = 0
 !
 !  The I loop runs over the blocks.
 !
   do i = 1, nbloks
 !
 !  The equations for the current block are added to those current-
 !  ly involved in the elimination process, by increasing  lasteq
 !  by  integs(1,i) after the rowsize of these equations has been
 !  recorded in the array D.
 !
     nrowad = integs(1,i)
 
     do icount = 1, nrowad
 
       nexteq = lasteq + icount
 
       rowmax = 0.0D+00
       do j = 1, ncols
         rowmax = max ( rowmax, abs ( w(nexteq,j) ) )
       end do
 
       if ( rowmax == 0.0D+00 ) then
         go to 150
       end if
 
       d(nexteq) = rowmax
 
     end do
 
     lasteq = lasteq + nrowad
 !
 !  There will be  lasti=integs(2,i)  elimination steps before
 !  the equations in the next block become involved. further,
 !  l a s t c l  records the number of columns involved in the cur-
 !  rent elimination step. it starts equal to  ncols  when a block
 !  first becomes involved and then drops by one after each elim-
 !  ination step.
 !
     lastcl = ncols
     lasti = integs(2,i)
 
     do icount = 1, lasti
 
       ipvteq = ipvteq+1
 
       if ( ipvteq < lasteq ) then
         go to 30
       end if
 
       if ( d(ipvteq) < abs ( w(ipvteq,1)) + d(ipvteq) ) then
         go to 100
       end if
 
       go to 150
 !
 !  Determine the smallest ISTAR in  (ipvteq,lasteq)  for
 !  which  abs(w(istar,1))/d(istar)  is as large as possible, and
 !  interchange equations  ipvteq  and  istar  in case  ipvteq
 !  < istar .
 !
    30     continue
 
       colmax = abs(w(ipvteq,1)) / d(ipvteq)
       istar = ipvteq
       ipvtp1 = ipvteq+1
 
       do ii = ipvtp1, lasteq
         awi1od = abs(w(ii,1)) / d(ii)
         if ( colmax < awi1od ) then
           colmax = awi1od
           istar = ii
         end if
       end do
 
       if ( abs(w(istar,1))+d(istar) == d(istar) ) then
         go to 150
       end if
 
       if ( istar == ipvteq ) then
         go to 60
       end if
 
       iflag = -iflag
 
       temp = d(istar)
       d(istar) = d(ipvteq)
       d(ipvteq) = temp
 
       temp = b(istar)
       b(istar) = b(ipvteq)
       b(ipvteq) = temp
 
       do j = 1, lastcl
         temp = w(istar,j)
         w(istar,j) = w(ipvteq,j)
         w(ipvteq,j) = temp
       end do
 !
 !  Subtract the appropriate multiple of equation  ipvteq  from
 !  equations  ipvteq+1,...,lasteq to make the coefficient of the
 !  ipvteq-th unknown (presently in column 1 of  w ) zero, but
 !  store the new coefficients in  w  one to the left from the old.
 !
    60     continue
 
       do ii = ipvtp1, lasteq
 
         ratio = w(ii,1)/w(ipvteq,1)
         do j = 2, lastcl
           w(ii,j-1) = w(ii,j)-ratio*w(ipvteq,j)
         end do
         w(ii,lastcl) = 0.0D+00
         b(ii) = b(ii)-ratio*b(ipvteq)
 
       end do
 
       lastcl = lastcl-1
 
     end do
 
 100     continue
 
   end do
 !
 !  At this point, W and B contain an upper triangular linear system
 !  equivalent to the original one, with  w(i,j) containing entry
 !  (i, i-1+j ) of the coefficient matrix. solve this system by backsub-
 !  stitution, taking into account its block structure.
 !
 !  i-loop over the blocks, in reverse order
 !
   i = nbloks
 
   110 continue
 
   lasti = integs(2,i)
   jmax = ncols-lasti
 
   do icount = 1, lasti
 
     sum1 = 0.0D+00
     do j = 1, jmax
       sum1 = sum1 + x(ipvteq+j) * w(ipvteq,j+1)
     end do
 
     x(ipvteq) = ( b(ipvteq) - sum1 ) / w(ipvteq,1)
     jmax = jmax+1
     ipvteq = ipvteq-1
 
   end do
 
   i = i-1
   if ( 0 < i ) then
     go to 110
   end if
 
   return
 
   150 continue
 
   iflag = 0
   return
 end
diff --git a/pppack/difequ.f90 b/pppack/difequ.f90
index 51a61f3..48ef91e 100644
--- a/pppack/difequ.f90
+++ b/pppack/difequ.f90
@@ -1,185 +1,185 @@
 !>
 !> @file difequ.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine difequ ( mode, xx, v )
 
 !*************************************************************************
 !
 !! DIFEQU returns information about a differential equation.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, integer MODE, an integer indicating the task to be performed.
 !    1, initialization
 !    2, evaluate  de  at  xx
 !    3, specify the next side condition
 !    4, analyze the approximation
 !
 !    Input, real ( kind = 8 ) XX, a point at which information is wanted
 !
 !    Output, real ( kind = 8 ) V, depends on the MODE.  See comments below
 !
   implicit none
 
   integer, parameter :: npiece = 100
   integer, parameter :: ncoef = 2000
 
   real ( kind = 8 ) break
   real ( kind = 8 ) coef
   real ( kind = 8 ) eps
   real ( kind = 8 ) ep1
   real ( kind = 8 ) ep2
   real ( kind = 8 ) error
   real ( kind = 8 ) factor
   integer i
   integer iside
   integer itermx
   integer k
   integer kpm
   integer l
   integer m
   integer mode
   real ( kind = 8 ) rho
   real ( kind = 8 ) s2ovep
   real ( kind = 8 ) solutn
   real ( kind = 8 ) un
   real ( kind = 8 ) v(20)
   real ( kind = 8 ) value
   real ( kind = 8 ) x
   real ( kind = 8 ) xside
   real ( kind = 8 ) xx
 
   common /approx/ break(npiece),coef(ncoef),l,kpm
   common /side/ m,iside,xside(10)
   common /other/ itermx,k,rho(19)
 !
 !  This sample of DIFEQU is for the example in chapter xv.  It is a
 !  nonlinear second order two point boundary value problem.
 !
   go to (10,50,60,110), mode
 !
 !  Initialize everything,  Set the order M of the differential equation,
 !  the nondecreasing sequence xside(i),i=1,...,m, of points at which side
 !  conditions are given and anything else necessary.
 !
    10 continue
 
   m = 2
   xside(1) = 0.0D+00
   xside(2) = 1.0D+00
 !
 !  Print out heading.
 !
   write ( *, * ) ' '
   write ( *, * ) '  Carrier''s nonlinear perturb. problem'
   write ( *, * ) ' '
 
   eps = 0.005D+00
   write(*,*)'EPS = ',eps
 !
 !  Set constants used in formula for solution below.
 !
   factor = ( sqrt ( 2.0D+00 ) + sqrt ( 3.0D+00 ) )**2
   s2ovep = sqrt ( 2.0D+00 / eps )
 !
 !  Initial guess for Newton iteration. un(x)=x*x-1.
 !
   l = 1
   break(1) = 0.0D+00
   do i = 1, kpm
     coef(i) = 0.0D+00
   end do
   coef(1) = -1.0D+00
   coef(3) = 2.0D+00
   itermx = 10
   return
 !
 !  Provide value of left side coefficients and right side at  xx .
 !  specifically, at  xx  the dif.equ. reads:
 !
 !    v(m+1)d**m+v(m)d**(m-1) + ... + v(1)d**0 = v(m+2)
 !
 !  in terms of the quantities v(i),i=1,...,m+2, to be computed here.
 !
    50 continue
 
   v(3) = eps
   v(2) = 0.0D+00
   call ppvalu(break,coef,l,kpm,xx,0,un)
   v(1) = 2.0D+00 * un
   v(4) = un**2 + 1.0D+00
   return
 !
 !  provide the M side conditions. these conditions are of the form
 !        v(m+1)d**m+v(m)d**(m-1) + ... + v(1)d**0 = v(m+2)
 !  in terms of the quantities v(i),i=1,...,m+2, to be specified here.
 !  note that v(m+1)=0  for customary side conditions.
 !
    60 continue
 
   v(m+1) = 0.0D+00
   if ( iside == 1 ) then
     v(2) = 1.0D+00
     v(1) = 0.0D+00
     v(4) = 0.0D+00
     iside = iside+1
   else if ( iside == 2 ) then
     v(2) = 0.0D+00
     v(1) = 1.0D+00
     v(4) = 0.0D+00
     iside = iside + 1
   end if
 
   return
 !
 !  Calculate the error near the boundary layer at 1.
 !
   110 continue
 
   write(*,*)' '
   write(*,*)' X, G(X) and G(X)-F(X) at selected points:'
   write(*,*)' '
 
   x = 0.75D+00
 
   do i = 1, 9
     ep1 = exp ( s2ovep * ( 1.0D+00 - x ) ) * factor
     ep2 = exp ( s2ovep * ( 1.0D+00 + x ) ) * factor
     solutn = 12.0D+00 / ( 1.0D+00 + ep1 )**2 * ep1 &
             +12.0D+00 / ( 1.0D+00 + ep2 )**2 * ep2 - 1.0D+00
     call ppvalu(break,coef,l,kpm,x,0,value)
     error = solutn-value
     write ( *, '(1x,3g14.6)' ) x, solutn, error
     x = x+0.03125
   end do
 
   return
 end
diff --git a/pppack/dtblok.f90 b/pppack/dtblok.f90
index 03d5727..1327c65 100644
--- a/pppack/dtblok.f90
+++ b/pppack/dtblok.f90
@@ -1,103 +1,103 @@
 !>
 !> @file dtblok.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine dtblok ( bloks, integs, nbloks, ipivot, iflag, detsgn, detlog )
 
 !*************************************************************************
 !
 !! DTBLOK gets the determinant of an almost block diagonal matrix.
 !
 !  Discussion:
 !
 !    The matrix's PLU factorization must have been obtained
 !    previously by FCBLOK.
 !
 !    The logarithm of the determinant is computed instead of the
 !    determinant itself to avoid the danger of overflow or underflow
 !    inherent in this calculation.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !  bloks, integs, nbloks, ipivot, iflag  are as on return from fcblok.
 !            in particular, iflag=(-1)**(number of interchanges dur-
 !            ing factorization) if successful, otherwise iflag=0.
 !
 !  detsgn  on output, contains the sign of the determinant.
 !
 !  detlog  on output, contains the natural logarithm of the determi-
 !            nant if determinant is not zero. otherwise contains 0.
 !
   implicit none
 
   integer nbloks
 
   real ( kind = 8 ) bloks(1)
   real ( kind = 8 ) detlog
   real ( kind = 8 ) detsgn
   integer i
   integer iflag
   integer index
   integer indexp
   integer integs(3,nbloks)
   integer ip
   integer ipivot(1)
   integer k
   integer last
   integer nrow
 
   detsgn = iflag
   detlog = 0.0D+00
 
   if ( iflag == 0 ) then
     return
   end if
 
   index = 0
   indexp = 0
 
   do i = 1, nbloks
 
     nrow = integs(1,i)
     last = integs(3,i)
 
     do k = 1, last
       ip = index + nrow * (k-1) + ipivot(indexp+k)
       detlog = detlog + log ( abs ( bloks(ip) ) )
       detsgn = detsgn * sign ( 1.0D+00, bloks(ip) )
     end do
 
     index = nrow*integs(2,i)+index
     indexp = indexp+nrow
 
   end do
 
   return
 end
diff --git a/pppack/eqblok.f90 b/pppack/eqblok.f90
index 411f1de..4476458 100644
--- a/pppack/eqblok.f90
+++ b/pppack/eqblok.f90
@@ -1,192 +1,192 @@
 !>
 !> @file eqblok.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine eqblok ( t, n, kpm, work1, work2, bloks, lenblk, integs, &
   nbloks, b )
 
 !*************************************************************************
 !
 !! EQBLOK is to be called in COLLOC.
 !
 !  Method:
 !
 !    Each breakpoint interval gives rise to a block in the linear system.
 !    this block is determined by the K collocation equations in the interval
 !    with the side conditions (if any) in the interval interspersed ap-
 !    propriately, and involves the  kpm  b-splines having the interval in
 !    their support. correspondingly, such a block has  nrow=k+isidel
 !    rows, with  isidel=number of side conditions in this and the prev-
 !    ious intervals, and  ncol=kpm  columns.
 !
 !    Further, because the interior knots have multiplicity  k, we can
 !    carry out (in slvblk)  k  elimination steps in a block before pivot-
 !    ing might involve an equation from the next block. in the last block,
 !    of course, all kpm elimination steps will be carried out (in slvblk).
 !
 !    see the detailed comments in the solveblok package for further in-
 !    formation about the almost block diagonal form used here.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !  input
 !
 !    Input, real ( kind = 8 ) T(N+KPM), the knot sequence.
 !
 !    Input, integer N, the dimension of the approximating spline space,
 !    that is, the order of the linear system to be constructed.
 !
 !    Input, integer KPM, = K + M, the order of the approximating spline.
 !
 !    Input, integer LENBLK, the maximum length of the array BLOKS,
 !    as allowed by the dimension statement in COLLOC.
 !
 !  work  areas
 !
 !  work1    used in  putit, of size (kpm,kpm)
 !  work2    used in  putit, of size (kpm,m+1)
 !
 !  output
 !
 !  bloks    the coefficient matrix of the linear system, stored in al-
 !           most block diagonal form, of size
 !              kpm*sum(integs(1,i) , i=1,...,nbloks)
 !
 !  integs   an integer array, of size (3,nbloks), describing the block
 !           structure.
 !           integs(1,i) = number of rows in block  i
 !           integs(2,i) = number of columns in block  i
 !           integs(3,i) = number of elimination steps which can be
 !                       carried out in block  i  before pivoting might
 !                       bring in an equation from the next block.
 !
 !  nbloks number of blocks, equals number of polynomial pieces
 !
 !  b      the right side of the linear system, stored corresponding to the
 !         almost block diagonal form, of size sum(integs(1,i) , i=1,...,
 !         nbloks).
 !
   implicit none
 
   integer kpm
   integer n
 
   real ( kind = 8 ) b(*)
   real ( kind = 8 ) bloks(*)
   integer i
   integer index
   integer indexb
   integer integs(3,*)
   integer iside
   integer isidel
   integer itermx
   integer k
   integer left
   integer lenblk
   integer m
   integer nbloks
   integer nrow
   real ( kind = 8 ) rho
   real ( kind = 8 ) t(n+kpm)
   real ( kind = 8 ) work1(kpm,kpm)
   real ( kind = 8 ) work2(kpm,*)
   real ( kind = 8 ) xside
 
   common /side/ m,iside,xside(10)
   common /other/ itermx,k,rho(19)
 
   index = 1
   indexb = 1
   i = 0
   iside = 1
 
   do left = kpm, n, k
 
     i = i + 1
 !
 !  Determine integs(.,i)
 !
     integs(2,i) = kpm
 
     if ( n <= left ) then
       integs(3,i) = kpm
       isidel = m
       go to 30
     end if
 
     integs(3,i) = k
 !
 !  At this point,  iside-1  gives the number of side conditions
 !  incorporated so far. adding to this the side conditions in the
 !  current interval gives the number  isidel .
 !
     isidel = iside - 1
 
     do
 
       if ( isidel == m ) then
         exit
       end if
 
       if ( t(left+1) <= xside(isidel+1) ) then
         exit
       end if
 
       isidel = isidel + 1
 
     end do
 
 30  continue
 
     nrow = k + isidel
     integs(1,i) = nrow
 !
 !  The detailed equations for this block are generated and put
 !  together in PUTIT.
 !
     if ( lenblk < index + nrow * kpm - 1 ) then
       write ( *, * ) ' '
       write ( *, * ) 'EQBLOK - Fatal error!'
       write ( *, * ) '  The dimension of BLOKS is too small.'
       write ( *, * ) '  LENBLK = ', lenblk
       stop
     end if
 
     call putit ( t, kpm, left, work1, work2, bloks(index), nrow, b(indexb) )
 
     index = index + nrow * kpm
     indexb = indexb + nrow
 
   end do
 
   nbloks = i
 
   return
 end
diff --git a/pppack/evnnot.f90 b/pppack/evnnot.f90
index 518bf00..bd553fc 100644
--- a/pppack/evnnot.f90
+++ b/pppack/evnnot.f90
@@ -1,85 +1,85 @@
 !>
 !> @file evnnot.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine evnnot ( break, coef, l, k, brknew, lnew, coefg )
 
 !*************************************************************************
 !
 !! EVNNOT is a "fake" version of NEWNOT.
 !
 !  Discussion:
 !
 !    EVNNOT returns LNEW+1 knots in BRKNEW which are
 !    evenly spaced between BREAK(1) and BREAK(L+1).
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) BREAK(L+1), coef, l, k.....contains the
 !    pp-representation of a certain function F of order K.  Specifically,
 !    d**(k-1)f(x)=coef(k,i)  for  break(i) <= x < break(i+1)
 !
 !    Input, integer LNEW, the number of subintervals into which the interval
 !    (a,b) is to be sectioned by the new breakpoint sequence  brknew .
 !
 !    Output, real ( kind = 8 ) BRKNEW(LNEW+1), the new breakpoints.
 !
 !    Output, coefg  the coefficient part of the pp-repr.  break, coefg, l, 2
 !    for the monotone p.linear function G with respect to which  brknew  will
 !    be equidistributed.
 !
   implicit none
 
   integer k
   integer l
   integer lnew
 
   real ( kind = 8 ) break(l+1)
   real ( kind = 8 ) brknew(lnew+1)
   real ( kind = 8 ) coef(k,l)
   real ( kind = 8 ) coefg(2,l)
   integer i
 
   if ( lnew == 0 ) then
 
     brknew(1) = 0.5D+00 * ( break(1) + break(l+1) )
 
   else
 
     do i = 1, lnew+1
       brknew(i) = ( real ( lnew - i + 1, kind = 8 ) * break(1) &
                   + real (        i - 1, kind = 8 ) * break(l+1) ) &
                   / real ( lnew,         kind = 8 )
     end do
 
   end if
 
   return
 end
diff --git a/pppack/factrb.f90 b/pppack/factrb.f90
index 298312b..fa0a395 100644
--- a/pppack/factrb.f90
+++ b/pppack/factrb.f90
@@ -1,188 +1,188 @@
 !>
 !> @file factrb.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine factrb ( w, ipivot, d, nrow, ncol, last, iflag )
 
 !*************************************************************************
 !
 !! FACTRB constructs a partial PLU factorization.
 !
 !  Discussion:
 !
 !    This factorization corresponds to steps 1 through LAST in Gauss
 !    elimination for the matrix W of order ( NROW, NCOL ), using
 !    pivoting of scaled rows.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input/output, real ( kind = 8 ) W(NROW,NCOL); on input, contains the
 !    matrix to be partially factored; on output, the partial factorization.
 !
 !    Output, integer IPIVOT(NROW), contains a record of the pivoting
 !    strategy used; row IPIVOT(I) is used during the I-th elimination step,
 !    for I = 1, ..., LAST.
 !
 !    Workspace, real ( kind = 8 ) D(NROW), used to store the maximum entry
 !    in each row.
 !
 !    Input, integer NROW, the number of rows of W.
 !
 !    Input, integer NCOL, the number of columns of W.
 !
 !    Input, integer LAST, the number of elimination steps to be carried out.
 !
 !    Input/output, integer IFLAG.  On output, equals the input value
 !    times (-1)**(number of row interchanges during the factorization
 !    process), in case no zero pivot was encountered.
 !    Otherwise, iflag=0 on output.
 !
   implicit none
 
   integer ncol
   integer nrow
 
   real ( kind = 8 ) awikdi
   real ( kind = 8 ) colmax
   real ( kind = 8 ) d(nrow)
   integer i
   integer iflag
   integer ipivi
   integer ipivk
   integer ipivot(nrow)
   integer j
   integer k
   integer kp1
   integer last
   real ( kind = 8 ) ratio
   real ( kind = 8 ) rowmax
   real ( kind = 8 ) w(nrow,ncol)
 !
 !  Initialize IPIVOT and D.
 !
   do i = 1, nrow
     ipivot(i) = i
   end do
 
   do i = 1, nrow
 
     rowmax = 0.0D+00
     do j = 1, ncol
       rowmax = max ( rowmax, abs ( w(i,j) ) )
     end do
 
     if ( rowmax == 0.0D+00 ) then
       iflag = 0
       return
     end if
 
     d(i) = rowmax
 
   end do
 !
 !  Gauss elimination with pivoting of scaled rows, loop over k=1,.,last
 !
   k = 1
 !
 !  As pivot row for k-th step, pick among the rows not yet used,
 !  i.e., from rows ipivot(k),...,ipivot(nrow), the one whose k-th
 !  entry (compared to the row size) is largest. then, if this row
 !  does not turn out to be row ipivot(k), redefine ipivot(k) ap-
 !  propriately and record this interchange by changing the sign
 !  of IFLAG.
 !
    30 continue
 
   ipivk = ipivot(k)
 
   if ( k == nrow ) then
     if ( abs(w(ipivk,nrow))+d(ipivk) <= d(ipivk) ) then
       iflag = 0
     end if
     return
   end if
 
   j = k
   kp1 = k+1
   colmax = abs(w(ipivk,k))/d(ipivk)
 !
 !  Find the largest pivot
 !
   do i = kp1, nrow
     ipivi = ipivot(i)
     awikdi = abs(w(ipivi,k)) / d(ipivi)
     if ( colmax < awikdi ) then
       colmax = awikdi
       j = i
     end if
   end do
 
   if ( j /= k ) then
     ipivk = ipivot(j)
     ipivot(j) = ipivot(k)
     ipivot(k) = ipivk
     iflag = -iflag
   end if
 !
 !  If pivot element is too small in absolute value, declare
 !  matrix to be noninvertible and quit.
 !
   if ( abs(w(ipivk,k))+d(ipivk) <= d(ipivk) ) then
     iflag = 0
     return
   end if
 !
 !  Otherwise, subtract the appropriate multiple of the pivot
 !  row from remaining rows, i.e., the rows ipivot(k+1),...,
 !  ipivot(nrow), to make k-th entry zero. save the multiplier in
 !  its place.
 !
   do i = kp1, nrow
 
     ipivi = ipivot(i)
     w(ipivi,k) = w(ipivi,k)/w(ipivk,k)
 
     ratio = -w(ipivi,k)
     do j = kp1, ncol
       w(ipivi,j) = ratio*w(ipivk,j)+w(ipivi,j)
     end do
 
   end do
 
   k = kp1
 !
 !  Check for having reached the next block.
 !
   if ( k <= last ) then
     go to 30
   end if
 
   return
 end
diff --git a/pppack/fcblok.f90 b/pppack/fcblok.f90
index 72f11d8..5a0c1d9 100644
--- a/pppack/fcblok.f90
+++ b/pppack/fcblok.f90
@@ -1,126 +1,126 @@
 !>
 !> @file fcblok.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine fcblok ( bloks, integs, nbloks, ipivot, scrtch, iflag )
 
 !*************************************************************************
 !
 !! FCBLOK supervises the PLU factorization with pivoting of
 !  scaled rows of the almost block diagonal matrix.
 !
 !    The almost block diagonal matrix is stored in the arrays
 !    BLOKS and INTEGS.
 !
 !    The FACTRB routine carries out steps 1,...,last of gauss
 !    elimination (with pivoting) for an individual block.
 !
 !    The SHIFTB routine shifts the remaining rows to the top of
 !    the next block
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !  bloks   an array that initially contains the almost block diagonal
 !            matrix  a  to be factored, and on return contains the com-
 !            puted factorization of  a .
 !
 !  integs  an integer array describing the block structure of  a .
 !
 !  nbloks  the number of blocks in  a .
 !
 !  ipivot  an integer array of dimension  sum (integs(1,n) ; n=1,
 !            ...,nbloks) which, on return, contains the pivoting stra-
 !            tegy used.
 !
 !  scrtch  work area required, of length  max (integs(1,n) ; n=1,
 !            ...,nbloks).
 !
 !  iflag   output parameter;
 !          =0  in case matrix was found to be singular.
 !            otherwise,
 !          =(-1)**(number of row interchanges during factorization)
 !
   implicit none
 
   integer nbloks
 
   real ( kind = 8 ) bloks(*)
   integer i
   integer iflag
   integer index
   integer indexb
   integer indexn
   integer integs(3,nbloks)
   integer ipivot(*)
   integer last
   integer ncol
   integer nrow
   real ( kind = 8 ) scrtch(*)
 
   iflag = 1
   indexb = 1
   indexn = 1
   i = 1
 !
 !  Loop over the blocks.  i  is loop index
 !
   do
 
     index = indexn
     nrow = integs(1,i)
     ncol = integs(2,i)
     last = integs(3,i)
 !
 !  Carry out elimination on the I-th block until next block
 !  enters, for columns 1 through LAST of I-th block.
 !
     call factrb ( bloks(index), ipivot(indexb), scrtch, nrow, ncol, &
       last, iflag )
 !
 !  Check for having reached a singular block or the last block.
 !
     if ( iflag == 0 .or. i == nbloks ) then
       exit
     end if
 
     i = i + 1
     indexn = nrow * ncol + index
 !
 !  Put the rest of the I-th block onto the next block.
 !
     call shiftb ( bloks(index), ipivot(indexb), nrow, ncol, last, &
       bloks(indexn), integs(1,i), integs(2,i) )
 
     indexb = indexb + nrow
 
   end do
 
   return
 end
diff --git a/pppack/interv.f90 b/pppack/interv.f90
index 60b9cb1..e2ed195 100644
--- a/pppack/interv.f90
+++ b/pppack/interv.f90
@@ -1,223 +1,223 @@
 !>
 !> @file interv.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine interv ( xt, lxt, x, left, mflag )
 
 !*******************************************************************************
 !
 !! INTERV brackets a real value in an ascending vector of values.
 !
 !  Discussion:
 !
 !    The XT array is a set of increasing values.  The goal of the routine
 !    is to determine the largest index I so that XT(I) <= X.
 !
 !    The routine is designed to be efficient in the common situation
 !    that it is called repeatedly, with X taken from an increasing
 !    or decreasing sequence.
 !
 !    This will happen when a piecewise polynomial is to be graphed.
 !    The first guess for LEFT is therefore taken to be the value
 !    returned at the previous call and stored in the local variable ILO.
 !
 !    A first check ascertains that ILO < LXT.  This is necessary
 !    since the present call may have nothing to do with the previous
 !    call.  Then, if XT(ILO) < = X < XT(ILO+1), we set LEFT=ILO
 !    and are done after just three comparisons.
 !
 !    Otherwise, we repeatedly double the difference ISTEP=IHI-ILO
 !    while also moving ILO and IHI in the direction of X, until
 !      XT(ILO) < = X < XT(IHI)
 !    after which we use bisection to get, in addition, ILO+1=IHI.
 !    LEFT=ILO is then returned.
 !
 !  Modified:
 !
 !    05 February 2004
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) XT(LXT), a nondecreasing sequence of values.
 !
 !    Input, integer LXT, the dimension of XT.
 !
 !    Input, real ( kind = 8 ) X, the point whose location with
 !    respect to the sequence XT is to be determined.
 !
 !    Output, integer LEFT, the index of the bracketing value:
 !      1     if             X  <  XT(1)
 !      I     if   XT(I)  <= X  < XT(I+1)
 !      LXT   if  XT(LXT) <= X
 !
 !    Output, integer MFLAG, indicates whether X lies within the
 !    range of the data.
 !    -1:            X  <  XT(1)
 !     0: XT(I)   <= X  < XT(I+1)
 !    +1: XT(LXT) <= X
 !
   implicit none
 
   integer lxt
 
   integer left
   integer mflag
   integer ihi
   integer, save :: ilo = 1
   integer istep
   integer middle
   real ( kind = 8 ) x
   real ( kind = 8 ) xt(lxt)
 
 !$omp threadprivate(ilo)
 
   ihi = ilo + 1
 
   if ( lxt <= ihi ) then
 
     if ( xt(lxt) <= x ) then
       go to 110
     end if
 
     if ( lxt <= 1 ) then
       mflag = -1
       left = 1
       return
     end if
 
     ilo = lxt - 1
     ihi = lxt
 
   end if
 
   if ( xt(ihi) <= x ) then
     go to 40
   end if
 
   if ( xt(ilo) <= x ) then
     mflag = 0
     left = ilo
     return
   end if
 !
 !  Now X < XT(ILO).  Decrease ILO to capture X.
 !
   istep = 1
 
    31 continue
 
   ihi = ilo
   ilo = ihi - istep
 
   if ( 1 < ilo ) then
     if ( xt(ilo) <= x ) then
       go to 50
     end if
     istep = istep * 2
     go to 31
   end if
 
   ilo = 1
 
   if ( x < xt(1) ) then
     mflag = -1
     left = 1
     return
   end if
 
   go to 50
 !
 !  Now XT(IHI) <= X.  Increase IHI to capture X.
 !
    40 continue
 
   istep = 1
 
    41 continue
 
   ilo = ihi
   ihi = ilo + istep
 
   if ( ihi < lxt ) then
     if ( x < xt(ihi) ) then
       go to 50
     end if
     istep = istep * 2
     go to 41
   end if
 
   if ( xt(lxt) <= x ) then
     go to 110
   end if
 !
 !  Now XT(ILO) < = X < XT(IHI).  Narrow the interval.
 !
   ihi = lxt
 
 50 continue
 
   do
 
     middle = ( ilo + ihi ) / 2
 
     if ( middle == ilo ) then
       mflag = 0
       left = ilo
       return
     end if
 !
 !  It is assumed that MIDDLE = ILO in case IHI = ILO+1.
 !
     if ( xt(middle) <= x ) then
       ilo = middle
     else
       ihi = middle
     end if
 
   end do
 !
 !  Set output and return.
 !
   110 continue
 
   mflag = 1
 
   if ( x == xt(lxt) ) then
     mflag = 0
   end if
 
   do left = lxt, 1, -1
     if ( xt(left) < xt(lxt) ) then
       return
     end if
   end do
 
   return
 end
diff --git a/pppack/knots.f90 b/pppack/knots.f90
index c7be744..574b793 100644
--- a/pppack/knots.f90
+++ b/pppack/knots.f90
@@ -1,103 +1,103 @@
 !>
 !> @file knots.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine knots ( break, l, kpm, t, n )
 
 !*************************************************************************
 !
 !! KNOTS is to be called in COLLOC.
 !
 !  Discussion:
 !
 !    From the given breakpoint sequence BREAK the routine constructs the
 !    knot sequence T so that
 !
 !      SPLINE(K+M,T) = PP(K+M,BREAK)
 !
 !    with M-1 continuous derivatives.  This means that
 !
 !      t(1),...,t(n+kpm) = break(1) kpm times, then break(2),...,
 !        break(l) each  k  times, then, finally, break(l+1) kpm times.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) BREAK(L+1), the breakpoint sequence.
 !
 !    Input, integer L, the number of intervals or pieces.
 !
 !    Input, integer KPM, = K+M, the order of the piecewise polynomial
 !    function or spline.
 !
 !    Output, real ( kind = 8 ) T(N+KPM), the knot sequence.
 !
 !    Output, integer N, = L*K+M = the dimension of SPLINE(K+M,T).
 !
   implicit none
 
   integer kpm
   integer l
   integer n
 
   real ( kind = 8 ) break(l+1)
   integer iside
   integer j
   integer jj
   integer jjj
   integer k
   integer ll
   integer m
   real ( kind = 8 ) t(*)
   real ( kind = 8 ) xside
 
   common /side/ m,iside,xside(10)
 
   k = kpm-m
   n = l*k+m
   jj = n+kpm
   jjj = l+1
 
   do ll = 1, kpm
     t(jj) = break(jjj)
     jj = jj-1
   end do
 
   do j = 1, l
     jjj = jjj-1
     do ll = 1, k
       t(jj) = break(jjj)
       jj = jj-1
     end do
   end do
 
   t(1:kpm) = break(1)
 
   return
 end
diff --git a/pppack/l2appr.f90 b/pppack/l2appr.f90
index 861d3bd..58a6c23 100644
--- a/pppack/l2appr.f90
+++ b/pppack/l2appr.f90
@@ -1,196 +1,196 @@
 !>
 !> @file l2appr.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine l2appr ( t, n, k, q, diag, bcoef )
 
 !*************************************************************************
 !
 !! L2APPR constructs a weighted L2 spline approximation to given data.
 !
 !  Discussion:
 !
 !    The routine constructs the weighted discrete L2-approximation by
 !    splines of order K with knot sequence T(1), ..., T(n+k) to
 !    given data points ( TAU(1:NTAU), GTAU(1:NTAU) ).
 !
 !    The B-spline coefficients BCOEF of the approximating spline are
 !    determined from the normal equations using Cholesky's method.
 !
 !  Method:
 !
 !    The B-spline coefficients of the L2-approximation are determined as the
 !    solution of the normal equations
 !
 !      sum ( (b(i), b(j) ) * bcoef(j) : j=1,...,n) =(b(i),g),
 !                                               i=1, ..., n .
 !    Here, b(i) denotes the i-th B-spline, G denotes the function to
 !    be approximated, and the inner product of two functions F and G
 !    is given by
 !
 !      (f,g)  := sum ( f(tau(i))*g(tau(i))*weight(i) : i=1,...,ntau) .
 !
 !    The arrays TAU and WEIGHT are given in common block
 !    DATA, as is the array GTAU containing the sequence
 !    g(tau(i)), i=1,..., NTAU.
 !
 !    The relevant function values of the B-splines  b(i), i=1,...,n, are
 !    supplied by the subprogram BSPLVB.
 !
 !    The coefficient matrix C, with
 !           c(i,j)  := (b(i), b(j)), i,j=1,...,n,
 !    of the normal equations is symmetric and (2*k-1)-banded, therefore
 !    can be specified by giving its K bands at or below the diagonal.
 !    For i=1,...,n,  we store
 !    (b(i),b(j)) = c(i,j)  in  q(i-j+1,j), j=i,...,min(i+k-1,n)
 !    and the right side
 !    (b(i), g )  in  bcoef(i).
 !
 !    Since B-spline values are most efficiently generated by finding sim-
 !    ultaneously the value of every nonzero B-spline at one point,
 !    the entries of C (i.e., of  Q ), are generated by computing, for
 !    each ll, all the terms involving  tau(ll)  simultaneously and adding
 !    them to all relevant entries.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) T(N+K), the knot sequence.
 !
 !    Input, integer N, the dimension of the space of splines of order K
 !    with knots t.
 !
 !    Input, integer K, the order.
 !
 !  work arrays
 !
 !    q....a work array of size (at least) k*n. its first  k  rows are used
 !       for the  k  lower diagonals of the gramian matrix  c.
 !
 !    diag.....a work array of length  n  used in bchfac .
 !
 !  input  via  c o m m o n  /data/
 !
 !  ntau.....number of data points
 !  (tau(i),gtau(i)), i=1,...,ntau     are the  ntau  data points to be
 !        fitted .
 !  weight(i), i=1,...,ntau    are the corresponding weights .
 !
 !  output
 !  bcoef(1), ..., bcoef(n)  the b-spline coefficients of the l2-appr.
 !
   implicit none
 
   integer k
   integer n
   integer, parameter :: ntmax = 200
 
   real ( kind = 8 ) bcoef(n)
   real ( kind = 8 ) biatx(k)
   real ( kind = 8 ) diag(n)
   real ( kind = 8 ) dw
   real ( kind = 8 ) gtau
   integer i
   integer j
   integer jj
   integer left
   integer leftmk
   integer ll
   integer mm
   integer ntau
   real ( kind = 8 ) q(k,n)
   real ( kind = 8 ) t(n+k)
   real ( kind = 8 ) tau
   real ( kind = 8 ) totalw
   real ( kind = 8 ) weight
 
   COMMON /DATA/ tau(ntmax),gtau(ntmax),weight(ntmax),totalw,ntau
 
   bcoef(1:n) = 0.0D+00
   q(1:k,1:n) = 0.0D+00
 
   left = k
   leftmk = 0
 
   do ll = 1, ntau
 !
 !  Locate LEFT such that tau(ll) in (t(left),t(left+1)).
 !
     do
 
       if ( left == n ) then
         exit
       end if
 
       if ( tau(ll) < t(left+1) ) then
         exit
       end if
 
       left = left + 1
       leftmk = leftmk + 1
 
     end do
 
     call bsplvb ( t, k, 1, tau(ll), left, biatx )
 !
 !  biatx(mm) contains the value of b(left-k+mm) at tau(ll).
 !  hence, with  dw := biatx(mm)*weight(ll), the number dw*gtau(ll)
 !  is a summand in the inner product
 !     (b(left-k+mm), g)  which goes into  bcoef(left-k+mm)
 !  and the number biatx(jj)*dw is a summand in the inner product
 !     (b(left-k+jj), b(left-k+mm)), into  q(jj-mm+1,left-k+mm)
 !  since  (left-k+jj)-(left-k+mm)+1 = jj - mm + 1 .
 !
     do mm = 1, k
 
       dw = biatx(mm)*weight(ll)
       j = leftmk+mm
       bcoef(j) = dw*gtau(ll)+bcoef(j)
       i = 1
 
       do jj = mm, k
         q(i,j) = biatx(jj)*dw+q(i,j)
         i = i+1
       end do
 
     end do
 
   end do
 !
 !  Construct the Cholesky factorization for C in  q , then
 !  use it to solve the normal equations
 !    c*x = bcoef
 !  for X, and store X in BCOEF.
 !
   call bchfac ( q, k, n, diag )
 
   call bchslv ( q, k, n, bcoef )
 
   return
 end
diff --git a/pppack/l2err.f90 b/pppack/l2err.f90
index 7449380..e25b7ab 100644
--- a/pppack/l2err.f90
+++ b/pppack/l2err.f90
@@ -1,146 +1,146 @@
 !>
 !> @file l2err.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine l2err ( iprfun, ftau, error )
 
 !*************************************************************************
 !
 !! L2ERR computes the errors of an L2 approximation.
 !
 !  Discussion:
 !
 !    This routine computes various errors of the current L2-approximation,
 !    whose piecewise polynomial representation is contained in common
 !    block APPROX, to the given data contained in common block  data.
 !
 !    It prints out the average error errl1, the l2-error errl2, and the
 !    maximum error errmax.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, integer IPRFUN.  If iprfun= 1, the routine prints out
 !    the value of the approximation as well as its error at
 !    every data point.
 !
 !    Output, real ( kind = 8 ) FTAU(NTAU), contains the value of the computed
 !    approximation at each value TAU(1:NTAU).
 !
 !    Output, error(1), ..., error(ntau),  with  error(i)=scale*(g-f)
 !    at tau(i), all i. here, SCALE equals 1. in case
 !    iprfun /= 1 , or the absolute error is greater than 100 some-
 !    where. otherwise, SCALE is such that the maximum of
 !    abs(error))  over all I lies between 10 and 100.  This
 !    makes the printed output more illustrative.
 !
   implicit none
 
   integer, parameter :: lpkmax = 100
   integer, parameter :: ntmax = 200
   integer, parameter :: ltkmax = 2000
 
   integer ntau
 
   real ( kind = 8 ) break
   real ( kind = 8 ) coef
   real ( kind = 8 ) err
   real ( kind = 8 ) errl1
   real ( kind = 8 ) errl2
   real ( kind = 8 ) errmax
   real ( kind = 8 ) error(ntau)
   real ( kind = 8 ) ftau(ntau)
   real ( kind = 8 ) gtau
   integer ie
   integer iprfun
   integer k
   integer l
   integer ll
   real ( kind = 8 ) scale
   real ( kind = 8 ) tau
   real ( kind = 8 ) totalw
   real ( kind = 8 ) weight
 
   COMMON /DATA/ tau(ntmax),gtau(ntmax),weight(ntmax),totalw,ntau
   common /approx/ break(lpkmax),coef(ltkmax),l,k
 
   errl1 = 0.0D+00
   errl2 = 0.0D+00
   errmax = 0.0D+00
 
   do ll = 1, ntau
     call ppvalu(break,coef,l,k,tau(ll),0,ftau(ll))
     error(ll) = gtau(ll)-ftau(ll)
     err = abs(error(ll))
     if ( errmax < err ) then
       errmax = err
     end if
     errl1 = errl1 + err * weight(ll)
     errl2 = errl2 + err**2 * weight(ll)
   end do
 
   errl1 = errl1 / totalw
   errl2 = sqrt ( errl2 / totalw )
 
   write ( *, * ) ' '
   write ( *, * ) ' Least square error =',errl2
   write ( *, * ) ' Average error     =',errl1
   write ( *, * ) ' Maximum error     =',errmax
   write ( *, * ) ' '
 
   if ( iprfun /= 1 ) then
     return
   end if
 !
 !  Scale error curve and print
 !
   ie = 0
   scale = 1.0D+00
 
   if ( errmax < 10.0D+00 ) then
 
     do ie = 1, 9
       scale = scale * 10.0D+00
       if ( 10.0D+00 <= errmax * scale ) then
         exit
       end if
     end do
 
   end if
 
   error(1:ntau) = error(1:ntau) * scale
 
   write(*,60) ie, (ll,tau(ll),ftau(ll),error(ll),ll=1,ntau)
 
    60 format (///14x,'approximation and scaled error curve'/ &
        7x,'data point',7x,'approximation',3x,'deviation x 10**',i1/ &
        (i4, f16.8,f16.8,f17.6))
 
   return
 end
diff --git a/pppack/l2knts.f90 b/pppack/l2knts.f90
index 6c064a6..c771c16 100644
--- a/pppack/l2knts.f90
+++ b/pppack/l2knts.f90
@@ -1,83 +1,83 @@
 !>
 !> @file l2knts.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine l2knts ( break, l, k, t, n )
 
 !*************************************************************************
 !
 !! L2KNTS converts breakpoints to knots.
 !
 !  Discussion:
 !
 !    The breakpoint sequence BREAK is converted into a corresponding
 !    knot sequence T to allow the representation of a piecewise
 !    polynomial function of order K with K-2 continuous derivatives
 !    as a spline of order K with knot sequence T.
 !
 !    This means that
 !    t(1), ..., t(n+k)= break(1) k times, then break(i), i=2,...,l, each
 !    once, then break(l+1) k times.  Therefore,  n=k-1+l.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, integer K, the order.
 !
 !    Input, integer L, the number of polynomial pieces.
 !
 !    Input, real ( kind = 8 ) BREAK(L+1), the breakpoint sequence.
 !
 !    Output, real ( kind = 8 ) T(N+K), the knot sequence.
 !
 !    Output, integer N, the dimension of the corresponding spline space
 !    of order K.
 !
   implicit none
 
   integer k
   integer l
   integer n
 
   real ( kind = 8 ) break(l+1)
   integer i
   real ( kind = 8 ) t(k-1+l+k)
 
   t(1:k-1) = break(1)
 
   do i = 1, l
     t(k-1+i) = break(i)
   end do
 
   n = k-1+l
 
   t(n+1:n+k) = break(l+1)
 
   return
 end
diff --git a/pppack/newnot.f90 b/pppack/newnot.f90
index f3c556a..436356e 100644
--- a/pppack/newnot.f90
+++ b/pppack/newnot.f90
@@ -1,206 +1,206 @@
 !>
 !> @file newnot.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine newnot ( break, coef, l, k, brknew, lnew, coefg )
 
 !*************************************************************************
 !
 !! NEWNOT returns LNEW+1 knots which are equidistributed on (A,B).
 !
 !  Discussion:
 !
 !    (a,b) = (break(1),break(l+1)) with respect to a certain monotone
 !    function G related to the K-th root of the K-th derivative of the
 !    piecewise polynomial function F whose piecewise polynomial
 !    representation is contained in  break, coef, l, k .
 !
 !  method
 !
 !    The K-th derivative of the given piecewise polynomial function F does
 !    not exist, except perhaps as a linear combination of delta functions.
 !    Nevertheless, we construct a piecewise constant function H with
 !    breakpoint sequence BREAK which is approximately proportional
 !    to abs(d**k(f)).
 !
 !  Specifically, on  (break(i), break(i+1)),
 !
 !     abs(jump at break(i) of pc)    abs(jump at break(i+1) of pc)
 !  h=-------------- + ----------------------------
 !       break(i+1)-break(i-1)         break(i+2) - break(i)
 !
 !  with  pc  the p.constant (k-1)st derivative of  f .
 !      then, the p.linear function  g  is constructed as
 !
 !    g(x) = integral of  h(y)**(1/k)  for  y  from  a  to  x
 !
 !  and its pp coefficients stored in  coefg .
 !
 !  then  brknew  is determined by
 !
 !        brknew(i) = a+g**(-1)((i-1)*step) , i=1,...,lnew+1
 !
 !  where  step=g(b)/lnew  and  (a,b) = (break(1),break(l+1)).
 !
 !  In the event that  pc=d**(k-1)(f) is constant in  (a,b)  and
 !  therefore  h=0 identically,  brknew  is chosen uniformly spaced.
 !
 !  optional  printed  output
 !  coefg.....the pp coefficients of  g  are printed out if  iprint  is set
 !        positive  in data statement below.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, break, coef, l, k.....contains the pp-representation of a certain
 !        function  f  of order  k . specifically,
 !        d**(k-1)f(x)=coef(k,i)  for  break(i) <= x < break(i+1)
 !
 !    Input, lnew.....number of intervals into which the interval (a,b) is to be
 !        sectioned by the new breakpoint sequence  brknew .
 !
 !    Output, real ( kind = 8 ) BRKNEW(LNEW+1), the new breakpoint sequence.
 !
 !    Output, coefg.....the coefficient part of the pp-repr.  break, coefg, l, 2
 !        for the monotone p.linear function g with respect to which brknew  will
 !        be equidistributed.
 !
   implicit none
 
   integer k
   integer l
   integer lnew
 
   real ( kind = 8 ) break(l+1)
   real ( kind = 8 ) brknew(lnew+1)
   real ( kind = 8 ) coef(k,l)
   real ( kind = 8 ) coefg(2,l)
   real ( kind = 8 ) dif
   real ( kind = 8 ) difprv
   integer i
   integer, save :: iprint = 0
   integer j
   real ( kind = 8 ) oneovk
   real ( kind = 8 ) step
   real ( kind = 8 ) stepi
 
   brknew(1) = break(1)
   brknew(lnew+1) = break(l+1)
 !
 !  If G is constant, BRKNEW is uniform.
 !
   if ( l <= 1) then
 
     step = (break(l+1)-break(1))/ real ( lnew, kind = 8 )
 
     do i = 2, lnew
       brknew(i) = break(1) + real ( i - 1, kind = 8 ) * step
     end do
 
     return
 
   end if
 !
 !  Construct the continuous piecewise linear function G.
 !
   oneovk = 1.0D+00 / real ( k, kind = 8 )
   coefg(1,1) = 0.0D+00
   difprv = abs(coef(k,2)-coef(k,1))/(break(3)-break(1))
 
   do i = 2, l
     dif = abs(coef(k,i)-coef(k,i-1))/(break(i+1)-break(i-1))
     coefg(2,i-1) = (dif+difprv)**oneovk
     coefg(1,i) = coefg(1,i-1)+coefg(2,i-1)*(break(i)-break(i-1))
     difprv = dif
   end do
 
   coefg(2,l) = ( 2.0D+00 * difprv )**oneovk
 !
 !  step = g(b)/lnew
 !
   step=(coefg(1,l)+coefg(2,l)*(break(l+1)-break(l))) / real ( lnew, kind = 8 )
 
   if ( 0 < iprint ) then
     write(*,20)step,(i,coefg(1,i),coefg(2,i),i=1,l)
   end if
 
    20 format (' step =',e16.7/(i5,2e16.5))
 !
 !  if G is constant, BRKNEW is uniform.
 !
   if ( step <= 0.0D+00 ) then
 
     step = (break(l+1)-break(1)) / real ( lnew, kind = 8 )
 
     do i = 2, lnew
       brknew(i) = break(1) + real ( i - 1, kind = 8 ) * step
     end do
 
     return
 
   end if
 !
 !  for i=2,...,lnew, construct  brknew(i)=a+g**(-1)(stepi),
 !  with  stepi=(i-1)*step .  this requires inversion of the p.lin-
 !  ear function  g .  for this,  j  is found so that
 !    g(break(j)) <= stepi .le. g(break(j+1))
 !  and then
 !    brknew(i) = break(j)+(stepi-g(break(j)))/dg(break(j)) .
 !  the midpoint is chosen if  dg(break(j))=0 .
 !
   j = 1
 
   do i = 2, lnew
 
     stepi = real ( i - 1, kind = 8 ) * step
 
     do
 
       if ( j == l ) then
         exit
       end if
 
       if ( stepi <= coefg(1,j+1) ) then
         exit
       end if
 
       j = j + 1
 
     end do
 
     if ( coefg(2,j) /= 0.0D+00 ) then
       brknew(i) = break(j)+(stepi-coefg(1,j))/coefg(2,j)
     else
       brknew(i) = ( break(j) + break(j+1) ) / 2.0D+00
     end if
 
   end do
 
   return
 end
diff --git a/pppack/ppvalu.f90 b/pppack/ppvalu.f90
index b392355..71a60ee 100644
--- a/pppack/ppvalu.f90
+++ b/pppack/ppvalu.f90
@@ -1,134 +1,134 @@
 !>
 !> @file ppvalu.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine ppvalu ( break, coef, l, k, x, jderiv, value )
 
 !*******************************************************************************
 !
 !! PPVALU evaluates a piecewise polynomial function or its derivative.
 !
 !  Discussion:
 !
 !    PPVALU calculates the value at X of the JDERIV-th derivative of
 !    the piecewise polynomial function F from its piecewise
 !    polynomial representation.
 !
 !    The interval index I, appropriate for X, is found through a
 !    call to INTERV.  The formula for the JDERIV-th derivative
 !    of F is then evaluated by nested multiplication.
 !
 !    The J-th derivative of F is given by:
 !
 !      (d**j)f(x) =
 !        coef(j+1,i) + h * (
 !        coef(j+2,i) + h * (
 !        ...
 !        coef(k-1,i) + h * (
 !        coef(k,i) / (k-j-1) ) / (k-j-2) ... ) / 2 ) / 1
 !
 !    with
 !
 !      H=X-BREAK(I)
 !
 !    and
 !
 !      i = max( 1 , max( j ,  break(j) <= x , 1 <= j <= l ) ).
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) BREAK(L+1), real COEF(*), integer L, for
 !    piecewise polynomial representation of the function F to
 !    be evaluated.
 !
 !    Input, integer K, the order of the polynomial pieces
 !    that make up the function F.  The usual value for
 !    K is 4, signifying a piecewise cubic polynomial.
 !
 !    Input, real ( kind = 8 ) X, the point at which to evaluate F or
 !    of its derivatives.
 !
 !    Input, integer JDERIV, the order of the derivative to be
 !    evaluated.  If JDERIV is 0, then F itself is evaluated,
 !    which is actually the most common case.  It is assumed
 !    that JDERIV is zero or positive.
 !
 !    Output, real ( kind = 8 ) VALUE, the value of the JDERIV-th
 !    derivative of F at X.
 !
   implicit none
 
   integer k
   integer l
 
   real ( kind = 8 ) break(l+1)
   real ( kind = 8 ) coef(k,l)
   real ( kind = 8 ) fmmjdr
   real ( kind = 8 ) h
   integer i
   integer jderiv
   integer m
   integer ndummy
   real ( kind = 8 ) value
   real ( kind = 8 ) x
 
   value = 0.0D+00
 
   fmmjdr = k - jderiv
 !
 !  Derivatives of order K or higher are identically zero.
 !
   if ( k <= jderiv ) then
     return
   end if
 !
 !  Find the index I of the largest breakpoint to the left of X.
 !
   call interv ( break, l+1, x, i, ndummy )
 !
 !  Evaluate the JDERIV-th derivative of the I-th polynomial piece at X.
 !
   h = x - break(i)
   m = k
 
   do
 
     value = ( value / fmmjdr ) * h + coef(m,i)
     m = m - 1
     fmmjdr = fmmjdr - 1.0D+00
 
     if ( fmmjdr <= 0.0D+00 ) then
       exit
     end if
 
   end do
 
   return
 end
diff --git a/pppack/putit.f90 b/pppack/putit.f90
index 28886c2..401b35a 100644
--- a/pppack/putit.f90
+++ b/pppack/putit.f90
@@ -1,178 +1,178 @@
 !>
 !> @file putit.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine putit ( t, kpm, left, scrtch, dbiatx, q, nrow, b )
 
 !*************************************************************************
 !
 !! PUTIT puts together one block of the collocation equation system.
 !
 !  Method:
 !
 !    The K collocation equations for the interval  (t(left),t(left+1))
 !    are constructed with the aid of the subroutine DIFEQU( 2, .,
 !    . ) and interspersed (in order) with the side conditions (if any) in
 !    this interval, using DIFEQU ( 3, ., . )  for the information.
 !
 !    The block Q has  kpm  columns, corresponding to the  kpm  b-
 !    splines of order  kpm  which have the interval (t(left),t(left+1))
 !    in their support. the block's diagonal is part of the diagonal of the
 !    total system.  The first equation in this block not overlapped by the
 !    preceding block is therefore equation LOWROW, with lowrow =
 !    number of side conditions in preceding intervals (or blocks).
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) T(LEFT+KPM), the knot sequence.
 !
 !    Input, integer KPM, the order of the spline.
 !
 !    Input, integer LEFT, indicates the interval of interest, viz the interval
 !    (t(left), t(left+1)).
 !
 !    Input, integer NROW, number of rows in block to be put together
 !
 !    Workspace, scrtch used in bsplvd, of size (kpm,kpm)
 !
 !    Workspace, real ( kind = 8 ) DBIATX(KPM,M+1), derivatives of b-splines,
 !    with dbiatx(j,i+1) containing the i-th derivative of the
 !    j-th b-spline of interest
 !
 !    Output, Q  the block, of size (nrow,kpm).
 !
 !    Output, B  the corresponding piece of the right side, of size (nrow)
 !
   implicit none
 
   integer kpm
   integer nrow
 
   real ( kind = 8 ) b(*)
   real ( kind = 8 ) dbiatx(kpm,*)
   real ( kind = 8 ) dx
   integer i
   integer irow
   integer iside
   integer itermx
   integer j
   integer k
   integer left
   integer ll
   integer lowrow
   integer m
   integer mode
   integer mp1
   real ( kind = 8 ) q(nrow,kpm)
   real ( kind = 8 ) rho
   real ( kind = 8 ) scrtch(*)
   real ( kind = 8 ) sum1
   real ( kind = 8 ) t(*)
   real ( kind = 8 ) v(20)
   real ( kind = 8 ) xm
   real ( kind = 8 ) xside
   real ( kind = 8 ) xx
 
   common /side/ m,iside,xside(10)
   common /other/ itermx,k,rho(19)
 
   mp1 = m + 1
 
   q(1:nrow,1:kpm) = 0.0D+00
 
   xm = ( t(left+1) + t(left) ) / 2.0D+00
   dx = ( t(left+1) - t(left) ) / 2.0D+00
 
   ll = 1
   lowrow = iside
 
   do irow = lowrow, nrow
 
     if ( k < ll ) then
       go to 20
     end if
 
     mode = 2
 !
 !  next collocation point is ...
 !
     xx = xm+dx*rho(ll)
     ll = ll+1
 !
 !  The corresponding collocation equation is next unless the next side
 !  condition occurs at a point at, or to the left of, the next
 !  collocation point.
 !
     if ( m < iside ) then
       go to 30
     end if
 
     if ( xx < xside(iside) ) then
       go to 30
     end if
 
     ll = ll-1
 
    20   continue
 
     mode = 3
     xx = xside(iside)
 
    30   continue
 
     call difequ(mode,xx,v)
 !
 !  The next equation, a collocation equation (mode=2) or a side
 !  condition (mode=3), reads
 !    (*)   (v(m+1)*d**m+v(m)*d**(m-1) +...+ v(1)*d**0)f(xx)=v(m+2)
 !  in terms of the info supplied by  difequ . the corresponding
 !  equation for the b-coefficients of  f  therefore has the left side of
 !  (*), evaluated at each of the  kpm  b-splines having  xx  in
 !  their support, as its  kpm  possibly nonzero coefficients.
 !
     call bsplvd(t,kpm,xx,left,scrtch,dbiatx,mp1)
 
     do j = 1, kpm
 
       sum1 = 0.0D+00
       do i = 1, mp1
         sum1 = sum1 + v(i) * dbiatx(j,i)
       end do
 
       q(irow,j) = sum1
 
     end do
 
     b(irow) = v(m+2)
 
   end do
 
   return
 end
diff --git a/pppack/rvec_print.f90 b/pppack/rvec_print.f90
index e482e0a..a25ae9d 100644
--- a/pppack/rvec_print.f90
+++ b/pppack/rvec_print.f90
@@ -1,83 +1,83 @@
 !>
 !> @file rvec_print.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine rvec_print ( n, a, title )
 
 !*******************************************************************************
 !
 !! RVEC_PRINT prints a real vector.
 !
 !  Discussion:
 !
 !    If all the entries are integers, the data is printed
 !    in integer format.
 !
 !  Modified:
 !
 !    19 November 2002
 !
 !  Author:
 !
 !    John Burkardt
 !
 !  Parameters:
 !
 !    Input, integer N, the number of components of the vector.
 !
 !    Input, real ( kind = 8 ) A(N), the vector to be printed.
 !
 !    Input, character ( len = * ) TITLE, a title to be printed.
 !
   implicit none
 
   integer n
 
   real ( kind = 8 ) a(n)
   integer i
   character ( len = * ) title
 
   if ( 0 < len_trim ( title ) ) then
     write ( *, '(a)' ) ' '
     write ( *, '(a)' ) trim ( title )
   end if
 
   write ( *, '(a)' ) ' '
 
   if ( all ( a(1:n) == aint ( a(1:n) ) ) ) then
     do i = 1, n
       write ( *, '(i6,i6)' ) i, int ( a(i) )
     end do
   else if ( all ( abs ( a(1:n) ) < 1000000.0D+00 ) ) then
     do i = 1, n
       write ( *, '(i6,f14.6)' ) i, a(i)
     end do
   else
     do i = 1, n
       write ( *, '(i6,g14.6)' ) i, a(i)
     end do
   end if
 
   return
 end
diff --git a/pppack/sbblok.f90 b/pppack/sbblok.f90
index e490a42..d3db9e6 100644
--- a/pppack/sbblok.f90
+++ b/pppack/sbblok.f90
@@ -1,106 +1,106 @@
 !>
 !> @file sbblok.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine sbblok ( bloks, integs, nbloks, ipivot, b, x )
 
 !*************************************************************************
 !
 !! SBBLOK solves a linear system that was factored by FCBLOK.
 !
 !  Discussion:
 !
 !    The routine supervises the solution, by forward and backward
 !    substitution, of the linear system
 !
 !      A * x = b
 !
 !    for X, with the PLU factorization of A already generated in FCBLOK.
 !    Individual blocks of equations are solved via SUBFOR and SUBBAK.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    bloks, integs, nbloks, ipivot    are as on return from fcblok.
 !
 !    b       the right side, stored corresponding to the storage of
 !            the equations. see comments in SLVBLK for details.
 !
 !    Output, real ( kind = 8 ) X(*), the solution vector.
 !
   implicit none
 
   integer nbloks
 
   real ( kind = 8 ) b(*)
   real ( kind = 8 ) bloks(*)
   integer i
   integer index
   integer indexb
   integer indexx
   integer integs(3,nbloks)
   integer ipivot(*)
   integer j
   integer last
   integer nbp1
   integer ncol
   integer nrow
   real ( kind = 8 ) x(*)
 !
 !  Forward substitution pass:
 !
   index = 1
   indexb = 1
   indexx = 1
   do i = 1, nbloks
     nrow = integs(1,i)
     last = integs(3,i)
     call subfor(bloks(index),ipivot(indexb),nrow,last,b(indexb),x(indexx))
     index = nrow*integs(2,i)+index
     indexb = indexb+nrow
     indexx = indexx+last
   end do
 !
 !  Back substitution pass.
 !
   nbp1 = nbloks + 1
 
   do j = 1, nbloks
     i = nbp1 - j
     nrow = integs(1,i)
     ncol = integs(2,i)
     last = integs(3,i)
     index = index - nrow * ncol
     indexb = indexb - nrow
     indexx = indexx - last
     call subbak ( bloks(index), ipivot(indexb), nrow, ncol, last, x(indexx) )
   end do
 
   return
 end
diff --git a/pppack/setupq.f90 b/pppack/setupq.f90
index 56aa439..9e771e4 100644
--- a/pppack/setupq.f90
+++ b/pppack/setupq.f90
@@ -1,101 +1,101 @@
 !>
 !> @file setupq.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine setupq ( x, dx, y, npoint, v, qty )
 
 !*************************************************************************
 !
 !! SETUPQ is to be called in SMOOTH.
 !
 !  Discussion:
 !
 !    put  delx=x(.+1)-x(.)  into  v(.,4),
 !    put  the three bands of  q-transp*d  into  v(.,1-3), and
 !    put the three bands of  (d*q)-transp*(d*q)  at and above the diagonal
 !    into  v(.,5-7) .
 !
 !    here,  q is  the tridiagonal matrix of order (npoint-2,npoint)
 !    with general row  1/delx(i) , -1/delx(i)-1/delx(i+1) , 1/delx(i+1)
 !    and   d  is the diagonal matrix  with general row  dx(i) .
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
   implicit none
 
   integer npoint
 
   real ( kind = 8 ) diff
   real ( kind = 8 ) dx(npoint)
   integer i
   real ( kind = 8 ) prev
   real ( kind = 8 ) qty(npoint)
   real ( kind = 8 ) v(npoint,7)
   real ( kind = 8 ) x(npoint)
   real ( kind = 8 ) y(npoint)
 
   v(1,4) = x(2)-x(1)
 
   do i = 2, npoint-1
     v(i,4) = x(i+1) - x(i)
     v(i,1) = dx(i-1) / v(i-1,4)
     v(i,2) = -dx(i) / v(i,4) - dx(i) / v(i-1,4)
     v(i,3) = dx(i+1) / v(i,4)
   end do
 
   v(npoint,1) = 0.0D+00
   do i = 2, npoint-1
     v(i,5) = v(i,1)**2 + v(i,2)**2 + v(i,3)**2
   end do
 
   do i = 3, npoint-1
     v(i-1,6) = v(i-1,2)*v(i,1)+v(i-1,3)*v(i,2)
   end do
 
   v(npoint-1,6) = 0.0D+00
 
   do i = 4, npoint-1
     v(i-2,7) = v(i-2,3) * v(i,1)
   end do
 
   v(npoint-2,7) = 0.0D+00
   v(npoint-1,7) = 0.0D+00
 !
 !  Construct  q-transp. * y  in QTY.
 !
   prev = (y(2)-y(1)) / v(1,4)
   do i = 2, npoint-1
     diff = (y(i+1)-y(i)) / v(i,4)
     qty(i) = diff-prev
     prev = diff
   end do
 
   return
 end
diff --git a/pppack/shiftb.f90 b/pppack/shiftb.f90
index 0ddcd79..8e51c0c 100644
--- a/pppack/shiftb.f90
+++ b/pppack/shiftb.f90
@@ -1,107 +1,107 @@
 !>
 !> @file shiftb.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine shiftb ( ai, ipivot, nrowi, ncoli, last, ai1, nrowi1, ncoli1 )
 
 !*************************************************************************
 !
 !! SHIFTB shifts the rows in current block, ai, not used as pivot
 !  rows, if any, i.e., rows ipivot(last+1),...,ipivot(nrowi),
 !  onto the first mmax=nrow-last rows of the next block, ai1,
 !  with column last+j of ai  going to column j ,
 !  for j=1,...,jmax=ncoli-last. the remaining columns of these
 !  rows of ai1 are zeroed out.
 !
 !                             picture
 !
 !       original situation after         results in a new block i+1
 !       last=2 columns have been       created and ready to be
 !       done in factrb (assuming no      factored by next factrb call.
 !       interchanges of rows)
 !                   1
 !              x  x 1x  x  x           x  x  x  x  x
 !                   1
 !              0  x 1x  x  x           0  x  x  x  x
 !  block i          1                       ---
 !  nrowi=4     0  0 1x  x  x           0  0 1x  x  x  0  01
 !  ncoli=5          1                       1             1
 !  last=2      0  0 1x  x  x           0  0 1x  x  x  0  01
 !              -------------------          1             1   new
 !                   1x  x  x  x  x          1x  x  x  x  x1  block
 !                   1                       1             1   i+1
 !  block i+1        1x  x  x  x  x          1x  x  x  x  x1
 !  nrowi1= 5        1                       1             1
 !  ncoli1= 5        1x  x  x  x  x          1x  x  x  x  x1
 !              -------------------          1-------------1
 !                   1
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
   implicit none
 
   integer ncoli
   integer ncoli1
   integer nrowi1
   integer nrowi
 
   real ( kind = 8 ) ai(nrowi,ncoli)
   real ( kind = 8 ) ai1(nrowi1,ncoli1)
   integer ip
   integer ipivot(nrowi)
   integer j
   integer last
   integer m
 
   if ( nrowi-last < 1 ) then
     return
   end if
 
   if ( ncoli-last < 1 ) then
     return
   end if
 !
 !  Put the remainder of block I into AI1.
 !
   do m = 1, nrowi-last
     ip = ipivot(last+m)
     do j = 1, ncoli-last
       ai1(m,j) = ai(ip,last+j)
     end do
   end do
 !
 !  Zero out the upper right corner of ai1.
 !
   do j = ncoli+1-last, ncoli1
     do m = 1, nrowi-last
       ai1(m,j) = 0.0D+00
     end do
   end do
 
   return
 end
diff --git a/pppack/slvblk.f90 b/pppack/slvblk.f90
index 0ce546c..4ea74a5 100644
--- a/pppack/slvblk.f90
+++ b/pppack/slvblk.f90
@@ -1,180 +1,180 @@
 !>
 !> @file slvblk.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine slvblk ( bloks, integs, nbloks, b, ipivot, x, iflag )
 
 !*************************************************************************
 !
 !! SLVBLK solves the almost block diagonal linear system A*x=b.
 !
 !  Discussion:
 !
 !    Such almost block diagonal matrices arise naturally in piecewise
 !    polynomial interpolation or approximation and in finite element
 !    methods for two-point boundary value problems.  The PLU factorization
 !    method is implemented here to take advantage of the special structure
 !    of such systems for savings in computing time and storage requirements.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !  bloks  a one-dimenional array, of length
 !                   sum( integs(1,i)*integs(2,i) ; i=1,nbloks )
 !         on input, contains the blocks of the almost block diagonal
 !         matrix  a  .  the array integs (see below and the example)
 !         describes the block structure.
 !         on output, contains correspondingly the plu factorization
 !         of  a  (if iflag /= 0).  certain of the entries into bloks
 !         are arbitrary (where the blocks overlap).
 !
 !  integs integer array description of the block structure of  a .
 !         integs(1,i)=no. of rows of block i       = nrow
 !         integs(2,i)=no. of colums of block i     = ncol
 !         integs(3,i)=no. of elim. steps in block i = last
 !                          i =1,2,...,nbloks
 !         the linear system is of order
 !         n = sum ( integs(3,i) , i=1,...,nbloks ),
 !         but the total number of rows in the blocks is
 !         nbrows=sum( integs(1,i) ; i = 1,...,nbloks)
 !
 !  nbloks number of blocks
 !  b       right side of the linear system, array of length nbrows.
 !          certain of the entries are arbitrary, corresponding to
 !          rows of the blocks which overlap (see block structure and
 !          the example below).
 !  ipivot  on output, integer array containing the pivoting sequence
 !          used. length is nbrows
 !  x       on output, contains the computed solution (if iflag /= 0)
 !          length is n.
 !  iflag   on output, integer
 !          =(-1)**(no. of interchanges during factorization)
 !                   if  a  is invertible
 !          =0    if  a  is singular
 !
 !                   auxiliary programs
 !  fcblok (bloks,integs,nbloks,ipivot,scrtch,iflag)  factors the matrix
 !           a , and is used for this purpose in slvblk. its arguments
 !          are as in slvblk, except for
 !              scrtch=a work array of length max(integs(1,i)).
 !
 !  sbblok (bloks,integs,nbloks,ipivot,b,x)  solves the system a*x=b
 !          once  a  is factored. this is done automatically by slvblk
 !          for one right side b, but subsequent solutions may be
 !          obtained for additional b-vectors. the arguments are all
 !          as in slvblk.
 !
 !  dtblok (bloks,integs,nbloks,ipivot,iflag,detsgn,detlog) computes the
 !          determinant of  a  once slvblk or fcblok has done the fact-
 !          orization.the first five arguments are as in slvblk.
 !              detsgn =sign of the determinant
 !              detlog =natural log of the determinant
 !
 !              block structure of  a
 !  the nbloks blocks are stored consecutively in the array  bloks .
 !  the first block has its (1,1)-entry at bloks(1), and, if the i-th
 !  block has its (1,1)-entry at bloks(index(i)), then
 !         index(i+1)=index(i) + nrow(i)*ncol(i) .
 !    the blocks are pieced together to give the interesting part of  a
 !  as follows.  for i=1,2,...,nbloks-1, the (1,1)-entry of the next
 !  block (the (i+1)st block ) corresponds to the (last+1,last+1)-entry
 !  of the current i-th block.  recall last=integs(3,i) and note that
 !  this means that
 !      a. every block starts on the diagonal of  a .
 !      b. the blocks overlap (usually). the rows of the (i+1)st block
 !         which are overlapped by the i-th block may be arbitrarily de-
 !         fined initially. they are overwritten during elimination.
 !    the right side for the equations in the i-th block are stored cor-
 !  respondingly as the last entries of a piece of  b  of length  nrow
 !  (= integs(1,i)) and following immediately in  b  the corresponding
 !  piece for the right side of the preceding block, with the right side
 !  for the first block starting at  b(1) . in this, the right side for
 !  an equation need only be specified once on input, in the first block
 !  in which the equation appears.
 !
 !              example and test driver
 !    the test driver for this package contains an example, a linear
 !  system of order 11, whose nonzero entries are indicated in the fol-
 !  lowing schema by their row and column index modulo 10. next to it
 !  are the contents of the  integs  arrray when the matrix is taken to
 !  be almost block diagonal with  nbloks=5, and below it are the five
 !  blocks.
 !
 !                      nrow1=3, ncol1 = 4
 !           11 12 13 14
 !           21 22 23 24   nrow2=3, ncol2 = 3
 !           31 32 33 34
 !  last1=2      43 44 45
 !                 53 54 55            nrow3=3, ncol3 = 4
 !        last2=3         66 67 68 69   nrow4 = 3, ncol4 = 4
 !                          76 77 78 79      nrow5=4, ncol5 = 4
 !                          86 87 88 89
 !                 last3=1   97 98 99 90
 !                    last4=1   08 09 00 01
 !                                18 19 10 11
 !                       last5=4
 !
 !         actual input to bloks shown by rows of blocks of  a .
 !      (the ** items are arbitrary, this storage is used by slvblk)
 !
 !  11 12 13 14  / ** ** **  / 66 67 68 69  / ** ** ** **  / ** ** ** **
 !  21 22 23 24 /  43 44 45 /  76 77 78 79 /  ** ** ** ** /  ** ** ** **
 !  31 32 33 34/   53 54 55/   86 87 88 89/   97 98 99 90/   08 09 00 01
 !                                                           18 19 10 11
 !
 !  index=1      index = 13  index = 22     index = 34     index = 46
 !
 !         actual right side values with ** for arbitrary values
 !  b1 b2 b3 ** b4 b5 b6 b7 b8 ** ** b9 ** ** b10 b11
 !
 !  (it would have been more efficient to combine block 3 with block 4)
 !
   implicit none
 
   integer nbloks
 
   real ( kind = 8 ) b(*)
   real ( kind = 8 ) bloks(*)
   integer iflag
   integer integs(3,nbloks)
   integer ipivot(*)
   real ( kind = 8 ) x(*)
 !
 !  In the call to FCBLOK, X is used for temporary storage.
 !
   call fcblok ( bloks, integs, nbloks, ipivot, x, iflag )
 
   if ( iflag == 0 ) then
     return
   end if
 
   call sbblok ( bloks, integs, nbloks, ipivot, b, x )
 
   return
 end
diff --git a/pppack/smooth.f90 b/pppack/smooth.f90
index cab3d0b..d014648 100644
--- a/pppack/smooth.f90
+++ b/pppack/smooth.f90
@@ -1,218 +1,218 @@
 !>
 !> @file smooth.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine smooth ( x, y, dy, npoint, s, v, a, sfp )
 
 !*************************************************************************
 !
 !! SMOOTH constructs the cubic smoothing spline to given data.
 !
 !  Discussion:
 !
 !    The data is of the form
 !
 !      (x(i),y(i)), i=1,...,npoint,
 !
 !    The cubic smoothing spline has as small a second derivative as
 !    possible while
 !
 !      s(f)=sum( ((y(i)-f(x(i)))/dy(i))**2 , i=1,...,npoint ) <= s .
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !  input
 !
 !    Input, real ( kind = 8 ) X(NPOINT), the abscissas, assumed to be strictly
 !    increasing .
 !
 !    Input, real ( kind = 8 ) Y(NPOINT), the corresponding ordinates.
 !
 !  dy(1),...,dy(npoint)     estimate of uncertainty in data, assumed
 !  to be positive .
 !
 !  npoint.....number of data points,  assumed greater than 1
 !
 !  s.....upper bound on the discrete weighted mean square distance of
 !        the approximation  f  from the data .
 !
 !  work arrays:
 !
 !  v      of size (npoint,7)
 !  a      of size (npoint,4)
 !
 !  output
 !
 !  a(.,1).....contains the sequence of smoothed ordinates .
 !  a(i,j)=d**(j-1)f(x(i)), j=2,3,4, i=1,...,npoint-1 ,  i.e., the
 !        first three derivatives of the smoothing spline  f  at the
 !        left end of each of the data intervals .
 !     Warning . . .   a  would have to be transposed before it
 !        could be used in  ppvalu .
 !
 !  Method:
 !
 !    the matrices  q-transp*d  and  q-transp*d**2*q  are constructed in
 !    SETUPQ from  x  and  dy , as is the vector  qty=q-transp*y .
 !    then, for given  p , the vector U is determined in CHOL1D as
 !    the solution of the linear system
 !               (6(1-p)q-transp*d**2*q+p*r)u =qty  .
 !  from  u , the smoothing spline  f  (for this choice of smoothing par-
 !  ameter  p ) is obtained in the sense that
 !                        f(x(.)) = y-6(1-p)d**2*q*u        and
 !                  (d**2)f(x(.)) = 6*p*u                      .
 !
 !  the smoothing parameter  p  is found (if possible) so that
 !                sf(p) = s ,
 !  with  sf(p)=s(f) , where  f  is the smoothing spline as it depends
 !  on  p .  if  s=0, then p = 1 . if  sf(0) <= s , then p = 0 .
 !  otherwise, the secant method is used to locate an appropriate  p  in
 !  the open interval  (0,1) . specifically,
 !                p(0)=0,  p(1) = (s-sf(0))/dsf
 !  with  dsf=-24*u-transp*r*u  a good approximation to  d(sf(0)) = dsf
 !  +60*(d*q*u)-transp*(d*q*u) , and  u  as obtained for  p=0 .
 !  after that, for n=1,2,...  until sf(p(n)) <= 1.01*s, do....
 !  determine  p(n+1)  as the point at which the secant to  sf  at the
 !  points  p(n)  and  p(n-1)  takes on the value  s .
 !  if  p(n+1) >= 1 , choose instead  p(n+1)  as the point at which
 !  the parabola  sf(p(n))*((1-.)/(1-p(n)))**2  takes on the value  s.
 !
 !  Note that, in exact arithmetic, always  p(n+1) < p(n) , hence
 !  sf(p(n+1)) < sf(p(n)) . therefore, also stop the iteration,
 !  with final  p=1 , in case  sf(p(n+1)) >= sf(p(n)) .
 !
   implicit none
 
   integer npoint
 
   real ( kind = 8 ) a(npoint,4)
   real ( kind = 8 ) change
   real ( kind = 8 ) dy(npoint)
   integer i
   real ( kind = 8 ) p
   real ( kind = 8 ) prevp
   real ( kind = 8 ) prevsf
   real ( kind = 8 ) s
   real ( kind = 8 ) sfp
   real ( kind = 8 ) utru
   real ( kind = 8 ) v(npoint,7)
   real ( kind = 8 ) x(npoint)
   real ( kind = 8 ) y(npoint)
 
   call setupq ( x, dy, y, npoint, v, a(1,4) )
 
   if ( 0.0D+00 < s ) then
     go to 20
   end if
 
 10    continue
 
   p = 1.0D+00
   call chol1d ( p, v, a(1,4), npoint, a(1,3), a(1,1) )
   sfp = 0.0D+00
   go to 70
 
 20    continue
 
   p = 0.0D+00
   call chol1d ( p, v, a(1,4), npoint, a(1,3), a(1,1) )
 
   sfp = 0.0D+00
   do i = 1, npoint
     sfp = sfp + ( a(i,1) * dy(i) )**2
   end do
   sfp = sfp * 36.0D+00
 
   if ( sfp <= s ) then
     go to 70
   end if
 
   prevp = 0.0D+00
   prevsf = sfp
 
   utru = 0.0D+00
   do i = 2, npoint
     utru = utru + v(i-1,4) * ( a(i-1,3) * ( a(i-1,3) + a(i,3) ) + a(i,3)**2 )
   end do
 
   p = ( sfp - s ) / ( 24.0D+00 * utru )
 !
 !  Secant iteration for the determination of p starts here.
 !
    50 continue
 
   call chol1d ( p, v, a(1,4), npoint, a(1,3), a(1,1) )
 
   sfp = 0.0D+00
   do i = 1, npoint
     sfp = sfp + ( a(i,1) * dy(i) )**2
   end do
   sfp = sfp * 36.0D+00 * ( 1.0D+00 - p )**2
 
   if ( sfp <= 1.01D+00 * s ) then
     go to 70
   end if
 
   if ( prevsf <= sfp ) then
     go to 10
   end if
 
   change = ( p - prevp ) / ( sfp - prevsf ) * ( sfp - s )
   prevp = p
   p = p - change
   prevsf = sfp
 
   if ( 1.0D+00 <= p ) then
     p = 1.0D+00 - sqrt ( s / prevsf ) * ( 1.0D+00 - prevp )
   end if
 
   go to 50
 !
 !  The correct value of p has been found.
 !  Compute polynomial coefficients from  q*u (in a(.,1)).
 !
    70 continue
 
   do i = 1, npoint
     a(i,1) = y(i) - 6.0D+00 * ( 1.0D+00 - p ) * dy(i)**2 * a(i,1)
   end do
 
   do i = 1, npoint
     a(i,3) = 6.0D+00 * a(i,3) * p
   end do
 
   do i = 1, npoint-1
     a(i,4) = ( a(i+1,3) - a(i,3) ) / v(i,4)
     a(i,2) = ( a(i+1,1) - a(i,1) ) / v(i,4) &
       - ( a(i,3) + a(i,4) / 3.0D+00 * v(i,4) ) / 2.0D+00 * v(i,4)
   end do
 
   return
 end
diff --git a/pppack/spli2d.f90 b/pppack/spli2d.f90
index 348d253..b71a3f4 100644
--- a/pppack/spli2d.f90
+++ b/pppack/spli2d.f90
@@ -1,241 +1,241 @@
 !>
 !> @file spli2d.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine spli2d ( tau, gtau, t, n, k, m, work, q, bcoef, iflag )
 
 !*************************************************************************
 !
 !! SPLI2D produces a interpolatory tensor product spline.
 !
 !  Discussion:
 !
 !    SPLI2D is an extended version of SPLINT.
 !
 !    SPLI2D produces the B-spline coefficients BCOEF(J,.) of the
 !    spline of order K with knots T(I), I=1,..., N+K, which takes on
 !    the value GTAU(I,J) at TAU(I), I=1,..., N, J=1,...,M.
 !
 !    The I-th equation of the linear system
 !
 !      A * BCOEF = B
 !
 !    for the B-spline coefficients of the interpolant enforces
 !    interpolation at TAU(I), I=1,...,N.  Hence,  B(I)=GTAU(I),
 !    all I, and A is a band matrix with 2K-1 bands, if it is
 !    invertible.
 !
 !    The matrix A is generated row by row and stored, diagonal by
 !    diagonal, in the rows of the array Q, with the main diagonal
 !    going into row K.
 !
 !    The banded system is then solved by a call to BANFAC, which
 !    constructs the triangular factorization for A and stores it
 !    again in Q, followed by a call to BANSLV, which then obtains
 !    the solution BCOEF by substitution.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) TAU(N), contains the data point abscissas.
 !    TAU must be strictly increasing
 !
 !    Input, real ( kind = 8 ) GTAU(N), contains the data point ordinates,
 !    J=1,...,M.
 !
 !    Input, real ( kind = 8 ) T(N+K), the knot sequence.
 !
 !    Input, integer N,  the number of data points and the
 !    dimension of the spline space SPLINE(K,T)
 !
 !    Input, integer K, the order of the spline.
 !
 !    Input, integer M, the number of data sets.
 !
 !    Work space, real ( kind = 8 ) WORK(N).
 !
 !    Output, real ( kind = 8 ) Q(2*K-1)*N, containing the triangular
 !    factorization of the coefficient matrix of the linear
 !    system for the B-spline coefficients of the spline interpolant.
 !
 !    The B-spline coefficients for the interpolant of an additional
 !    data set (TAU(I),HTAU(I)), I=1,...,N  with the same data
 !    abscissae can be obtained without going through all the
 !    calculations in this routine, simply by loading HTAU into
 !    BCOEF and then using the statement
 !
 !      CALL BANSLV(Q,2*K-1,N,K-1,K-1,BCOEF)
 !
 !    Output, real ( kind = 8 ) BCOEF(N), the B-spline coefficients of
 !    the interpolant.
 !
 !    Output, integer IFLAG, error indicator.
 !    1, no error.
 !    2, an error occurred, which may have been caused by
 !       singularity of the linear system.
 !
 !       The linear system to be solved is theoretically invertible if
 !       and only if
 !
 !         T(I) < TAU(I) < TAU(I+K), for all I.
 !
 !       Violation of this condition is certain to lead to IFLAG=2.
 !
   implicit none
 
   integer m
   integer n
 
   real ( kind = 8 ) bcoef(m,n)
   real ( kind = 8 ) gtau(n,m)
   integer i
   integer iflag
   integer ilp1mx
   integer j
   integer jj
   integer k
   integer left
   real ( kind = 8 ) q((2*k-1)*n)
   real ( kind = 8 ) t(n+k)
   real ( kind = 8 ) tau(n)
   real ( kind = 8 ) taui
   real ( kind = 8 ) work(n)
 
   left = k
 
   do i = 1, (2*k-1)*n
     q(i) = 0.0
   end do
 !
 !  Construct the N interpolation equations.
 !
   do i = 1, n
 
     taui = tau(i)
     ilp1mx = min(i+k,n+1)
 !
 !  Find the index LEFT in the closed interval (I,I+K-1) such
 !  that:
 !
 !    T(LEFT) < = TAU(I) < T(LEFT+1)
 !
 !  The matrix will be singular if this is not possible.
 !
     left = max(left,i)
 
     if ( taui < t(left) ) then
       iflag = 2
       write(*,*)' '
       write(*,*)'SPLI2D - Fatal error!'
       write(*,*)'  The TAU array is not strictly increasing.'
       stop
     end if
 
    20   continue
 
     if ( t(left+1) <= taui ) then
 
       left = left+1
       if ( left < ilp1mx ) then
         go to 20
       end if
 
       left = left-1
 
       if ( t(left+1) < taui ) then
         iflag = 2
         write(*,*)' '
         write(*,*)'SPLI2D - Fatal error!'
         write(*,*)'  The TAU array is not strictly increasing.'
         stop
       end if
 
     end if
 !
 !  The I-th equation enforces interpolation at TAUI, hence
 !
 !    A(I,J)=B(J,K,T)(TAUI), for all J.
 !
 !  Only the K entries with J=LEFT-K+1, ..., LEFT actually might be
 !  nonzero.  These K numbers are returned, in WORK (used for
 !  temporary storage here), by the following call:
 !
     call bsplvb(t,k,1,taui,left,work)
 !
 !  We therefore want
 !
 !    WORK(J)=B(LEFT-K+J)(TAUI)
 !
 !  to go into
 !
 !        a(i,left-k+j), i.e., into  q(i-(left+j)+2*k,(left+j)-k) since
 !        a(i+j,j)  is to go into  q(i+k,j), all i,j,  if we consider  q
 !        as a two-dim. array , with  2*k-1  rows (see comments in
 !        banfac). in the present program, we treat  q  as an equivalent
 !        one-dimensional array (because of fortran restrictions on
 !  ??  LOST LINE ??
 !        entry
 !            i -(left+j)+2*k + ((left+j)-k-1)*(2*k-1)
 !                 = i-left+1+(left -k)*(2*k-1) + (2*k-2)*j
 !  of  q .
 !
     jj = i-left+1+(left-k)*(k+k-1)
 
     do j = 1, k
       jj = jj+k+k-2
       q(jj) = work(j)
     end do
 
   end do
 !
 !  Factor A, stored again in Q.
 !
   call banfac(q,k+k-1,n,k-1,k-1,iflag)
 
   if ( iflag == 2 ) then
     write(*,*)' '
     write(*,*)'SPLI2D - Fatal error!'
     write(*,*)'  BANFAC reports that the matrix is singular.'
     stop
   end if
 !
 !  Solve A*BCOEF=GTAU by backsubstitution.
 !
   do j = 1, m
 
     work(1:n) = gtau(1:n,j)
 
     call banslv ( q, k+k-1, n, k-1, k-1, work )
 
     bcoef(j,1:n) = work(1:n)
 
   end do
 
   return
 end
diff --git a/pppack/spline_hermite_set.f90 b/pppack/spline_hermite_set.f90
index 3b7e28e..cca087e 100644
--- a/pppack/spline_hermite_set.f90
+++ b/pppack/spline_hermite_set.f90
@@ -1,90 +1,90 @@
 !>
 !> @file spline_hermite_set.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine spline_hermite_set ( ndata, tdata, c )
 
 !*************************************************************************
 !
 !! SPLINE_HERMITE_SET sets up a piecewise cubic Hermite interpolant.
 !
 !  Modified:
 !
 !    06 April 1999
 !
 !  Reference:
 !
 !    Conte and de Boor,
 !    Algorithm CALCCF,
 !    Elementary Numerical Analysis,
 !    1973, page 235.
 !
 !  Parameters:
 !
 !    Input, integer NDATA, the number of data points.
 !    NDATA must be at least 2.
 !
 !    Input, real ( kind = 8 ) TDATA(NDATA), the abscissas of the data points.
 !    The entries of TDATA are assumed to be strictly increasing.
 !
 !    Input/output, real ( kind = 8 ) C(4,NDATA).
 !    On input, C(1,I) and C(2,I) should contain the value of the
 !    function and its derivative at TDATA(I), for I = 1 to NDATA.
 !    These values will not be changed by this routine.
 !    On output, C(3,I) and C(4,I) contain the quadratic
 !    and cubic coefficients of the Hermite polynomial
 !    in the interval (TDATA(I), TDATA(I+1)), for I=1 to NDATA-1.
 !    C(3,NDATA) and C(4,NDATA) are set to 0.
 !    In the interval (TDATA(I), TDATA(I+1)), the interpolating Hermite
 !    polynomial is given by
 !
 !    SVAL(TVAL) =                C(1,I)
 !       + ( TVAL - TDATA(I) ) * ( C(2,I)
 !       + ( TVAL - TDATA(I) ) * ( C(3,I)
 !       + ( TVAL - TDATA(I) ) *   C(4,I) ) )
 !
   implicit none
 
   integer ndata
 
   real ( kind = 8 ) c(4,ndata)
   real ( kind = 8 ) divdif1
   real ( kind = 8 ) divdif3
   real ( kind = 8 ) dt
   integer i
   real ( kind = 8 ) tdata(ndata)
 
   do i = 1, ndata-1
     dt = tdata(i+1) - tdata(i)
     divdif1 = ( c(1,i+1) - c(1,i) ) / dt
     divdif3 = c(2,i) + c(2,i+1) - 2.0D+00 * divdif1
     c(3,i) = ( divdif1 - c(2,i) - divdif3 ) / dt
     c(4,i) = divdif3 / ( dt * dt )
   end do
 
   c(3,ndata) = 0.0D+00
   c(4,ndata) = 0.0D+00
 
   return
 end
diff --git a/pppack/spline_hermite_val.f90 b/pppack/spline_hermite_val.f90
index 12bdcc9..76bb98c 100644
--- a/pppack/spline_hermite_val.f90
+++ b/pppack/spline_hermite_val.f90
@@ -1,97 +1,97 @@
 !>
 !> @file spline_hermite_val.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine spline_hermite_val ( ndata, tdata, c, tval, sval )
 
 !*************************************************************************
 !
 !! SPLINE_HERMITE_VAL evaluates a piecewise cubic Hermite interpolant.
 !
 !  Discussion:
 !
 !    SPLINE_HERMITE_SET must be called first, to set up the
 !    spline data from the raw function and derivative data.
 !
 !  Modified:
 !
 !    06 April 1999
 !
 !  Reference:
 !
 !    Conte and de Boor,
 !    Algorithm PCUBIC,
 !    Elementary Numerical Analysis,
 !    1973, page 234.
 !
 !  Parameters:
 !
 !    Input, integer NDATA, the number of data points.
 !    NDATA is assumed to be at least 2.
 !
 !    Input, real ( kind = 8 ) TDATA(NDATA), the abscissas of the data points.
 !    The entries of TDATA are assumed to be strictly increasing.
 !
 !    Input, real ( kind = 8 ) C(4,NDATA), contains the data computed by
 !    SPLINE_HERMITE_SET.
 !
 !    Input, real ( kind = 8 ) TVAL, the point where the interpolant is to
 !    be evaluated.
 !
 !    Output, real ( kind = 8 ) SVAL, the value of the interpolant at TVAL.
 !
   implicit none
 
   integer ndata
 
   real ( kind = 8 ) c(4,ndata)
   real ( kind = 8 ) dt
   integer i
   integer j
   real ( kind = 8 ) sval
   real ( kind = 8 ) tdata(ndata)
   real ( kind = 8 ) tval
 !
 !  Find the interval J = [ TDATA(J), TDATA(J+1) ] that contains
 !  or is nearest to TVAL.
 !
   j = ndata - 1
 
   do i = 1, ndata-2
 
     if ( tval < tdata(i+1) ) then
       j = i
       exit
     end if
 
   end do
 !
 !  Evaluate the cubic polynomial.
 !
   dt = tval - tdata(j)
 
   sval = c(1,j) + dt * ( c(2,j) + dt * ( c(3,j) + dt * c(4,j) ) )
 
   return
 end
diff --git a/pppack/splint.f90 b/pppack/splint.f90
index 10ef1f8..5a23103 100644
--- a/pppack/splint.f90
+++ b/pppack/splint.f90
@@ -1,208 +1,208 @@
 !>
 !> @file splint.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine splint ( tau, gtau, t, n, k, q, bcoef, iflag )
 
 !*************************************************************************
 !
 !! SPLINT produces the B-spline coefficients BCOEF of an interpolating spline.
 !
 !  Discussion:
 !
 !    The spline is of order K with knots T(1:N+K), and takes on the
 !    value GTAU(I) at TAU(I), for I = 1 to N.
 !
 !    The I-th equation of the linear system
 !      A * BCOEF = B
 !    for the b-coefficients of the interpolant enforces interpolation
 !    at TAU(1:N).
 !
 !    Hence,  b(i)=gtau(i), all i, and  a  is a band matrix with  2k-1
 !    bands (if it is invertible).
 !
 !    The matrix A is generated row by row and stored, diagonal by di-
 !    agonal, in the rows of the array  q , with the main diagonal go-
 !    ing into row K.  see comments in the program below.
 !
 !    The banded system is then solved by a call to  banfac (which con-
 !    structs the triangular factorization for  a  and stores it again in
 !    q ), followed by a call to  banslv (which then obtains the solution
 !    bcoef  by substitution).
 !
 !    BANFAC does no pivoting, since the total positivity of the matrix
 !    A makes this unnecessary.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) TAU(N), the data point abscissas.  The entries in
 !    TAU should be strictly increasing.
 !
 !    Input, real ( kind = 8 ) GTAU(N), the data ordinates.
 !
 !    Input, real ( kind = 8 ) T(N+K), the knot sequence.
 !
 !    Input, integer N, the number of data points.
 !
 !    Input, integer K, the order of the spline.
 !
 !  output
 !
 !  q, array of size  (2*k-1)*n , containing the triangular factoriz-
 !        ation of the coefficient matrix of the linear system for the b-
 !        coefficients of the spline interpolant.
 !        the b-coefficients for the interpolant of an additional data set can
 !        be obtained without going through all the calculations in this
 !        routine, simply by loading  htau  into  bcoef  and then execut-
 !        ing the    call banslv ( q, 2*k-1, n, k-1, k-1, bcoef )
 !
 !  bcoef, the b-coefficients of the interpolant, of length  n.
 !
 !  iflag, an integer indicating success (= 1)  or failure (= 2)
 !        the linear system to be solved is (theoretically) invertible if
 !        and only if
 !              t(i) < tau(i) < tau(i+k),    all i.
 !        violation of this condition is certain to lead to  iflag=2 .
 !
   implicit none
 
   integer n
 
   real ( kind = 8 ) bcoef(n)
   real ( kind = 8 ) gtau(n)
   integer i
   integer iflag
   integer ilp1mx
   integer j
   integer jj
   integer k
   integer kpkm2
   integer left
   real ( kind = 8 ) q((2*k-1)*n)
   real ( kind = 8 ) t(n+k)
   real ( kind = 8 ) tau(n)
   real ( kind = 8 ) taui
 
   kpkm2 = 2*(k-1)
   left = k
 
   do i = 1, (2*k-1)*n
     q(i) = 0.0D+00
   end do
 !
 !  loop over i to construct the  n  interpolation equations
 !
   do i = 1, n
 
     taui = tau(i)
     ilp1mx = min(i+k,n+1)
 !
 !  find  left  in the closed interval (i,i+k-1) such that
 !    t(left) <= tau(i)  < t(left+1)
 !  matrix is singular if this is not possible
 !
     left = max(left,i)
 
     if ( taui < t(left)) then
       go to 70
     end if
 
    20   continue
 
     if ( taui < t(left+1)) then
       go to 30
     end if
 
     left = left+1
     if ( left < ilp1mx) then
       go to 20
     end if
 
     left = left-1
     if ( t(left+1) < taui ) then
       go to 70
     end if
 !
 !  The i-th equation enforces interpolation at taui, hence
 !  a(i,j)=b(j,k,t)(taui), all j. only the  k  entries with  j =
 !  left-k+1,...,left actually might be nonzero. these  k  numbers
 !  are returned, in  bcoef (used for temp.storage here), by the
 !  following
 !
    30   continue
 
     call bsplvb(t,k,1,taui,left,bcoef)
 !
 !  We therefore want  bcoef(j)=b(left-k+j)(taui) to go into
 !  a(i,left-k+j), i.e., into  q(i-(left+j)+2*k,(left+j)-k) since
 !  a(i+j,j)  is to go into  q(i+k,j), all i,j,  if we consider  q
 !  as a two-dim. array , with  2*k-1  rows (see comments in
 !  banfac). in the present program, we treat  q  as an equivalent
 !  one-dimensional array (because of fortran restrictions on
 !  dimension statements) . we therefore want  bcoef(j) to go into
 !  entry
 !    i -(left+j)+2*k + ((left+j)-k-1)*(2*k-1)
 !   = i-left+1+(left -k)*(2*k-1) + (2*k-2)*j
 !  of  q .
 !
     jj = i-left+1+(left-k)*(k+k-1)
     do j = 1,k
       jj = jj+kpkm2
       q(jj) = bcoef(j)
     end do
 
   end do
 !
 !  Obtain factorization of A, stored again in Q.
 !
   call banfac ( q, k+k-1, n, k-1, k-1, iflag )
 
   if ( iflag == 2 ) then
     write(*,*)' '
     write(*,*)'SPLINT - Fatal Error!'
     write(*,*)'  The linear system is not invertible!'
     return
   end if
 !
 !  Solve  a*bcoef=gtau  by backsubstitution
 !
   bcoef(1:n) = gtau(1:n)
 
   call banslv ( q, k+k-1, n, k-1, k-1, bcoef )
   return
 
    70 iflag=2
 
   write ( *, * ) ' '
   write ( *, * ) 'SPLINT - Fatal Error!'
   write ( *, * ) '  The linear system is not invertible!'
 
   return
 end
diff --git a/pppack/splopt.f90 b/pppack/splopt.f90
index e27065d..0171ab2 100644
--- a/pppack/splopt.f90
+++ b/pppack/splopt.f90
@@ -1,371 +1,371 @@
 !>
 !> @file splopt.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine splopt ( tau, n, k, scrtch, t, iflag )
 
 !*************************************************************************
 !
 !! SPLOPT computes the knots for an optimal recovery scheme.
 !
 !  Discussion:
 !
 !    The optimal recovery scheme is of order K for data at TAU(1:N).
 !
 !    The interior knots T(K+1:N) are determined by Newton's method in
 !    such a way that the signum function which changes sign at
 !      T(K+1), ..., T(N)  and nowhere else in ( TAU(1), TAU(n) ) is
 !    orthogonal to the spline space SPLINE ( K, TAU ) on that interval.
 !
 !    Let XI(J) be the current guess for T(K+J), j=1,...,n-k.  Then
 !    the next Newton iterate is of the form
 !      xi(j) + (-)**(n-k-j)*x(j)  ,  j=1,...,n-k,
 !    with X the solution of the linear system
 !      C * X = D.
 !
 !    Here,  c(i,j)=b(i)(xi(j)), all j, with  b(i)  the i-th b-spline of
 !    order K for the knot sequence TAU, all i, and D is the vector
 !    given by  d(i)=sum( -a(j) , j=i,...,n )*(TAU(i+k)-TAU(i))/k, all i,
 !    with  a(i)=sum ( (-)**(n-k-j)*b(i,k+1,tau)(xi(j)) , j=1,...,n-k )
 !    for i=1,...,n-1, and  a(n)=-.5 .
 !
 !    See chapter XIII of text and references there for a derivation.
 !
 !    The first guess for  t(k+j)  is  (TAU(j+1)+...+TAU(j+k-1))/(k-1) .
 !    iteration terminates if  max(abs(x(j))) < t o l  , with
 !      TOL = t o l r t e *(TAU(n)-TAU(1))/(n-k) ,
 !    or else after NEWTMX iterations , currently,
 !      newtmx, tolrte / 10, .000001
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) TAU(N), the interpolation points.
 !    assumed to be nondecreasing, with tau(i) < tau(i+k),all i.
 !
 !    Input, integer N, the number of data points.
 !
 !    Input, integer K, the order of the optimal recovery scheme to be used.
 !
 !    Workspace, real ( kind = 8 ) SCRTCH((N-K)*(2*K+3)+5*K+3).  The various
 !    contents are specified in the text below .
 !
 !    Output, real ( kind = 8 ) T(N+K), the optimal knots ready for
 !    use in optimal recovery. specifically,  t(1)=... = t(k) =
 !    tau(1)  and  t(n+1)=... = t(n+k) = tau(n) , while the  n-k
 !    interior knots  t(k+1), ..., t(n)  are calculated.
 !
 !    Output, integer IFLAG, error indicator.
 !    = 1, success.  T contains the optimal knots.
 !    = 2, failure.  K < 3 or N < K or the linear system was singular.
 !
   implicit none
 
   integer k
   integer n
 
   real ( kind = 8 ) del
   real ( kind = 8 ) delmax
   real ( kind = 8 ) floatk
   integer i
   integer id
   integer iflag
   integer index
   integer j
   integer kp1
   integer kpkm1
   integer kpn
   integer l
   integer left
   integer leftmk
   integer lenw
   integer ll
   integer llmax
   integer llmin
   integer na
   integer nb
   integer nc
   integer nd
   integer, parameter :: newtmx = 10
   integer newton
   integer nmk
   integer nx
   real ( kind = 8 ) scrtch((n-k)*(2*k+3)+5*k+3)
   real ( kind = 8 ) t(n+k)
   real ( kind = 8 ) tau(n)
   real ( kind = 8 ) sign
   real ( kind = 8 ) signst
   real ( kind = 8 ) sum1
   real ( kind = 8 ) tol
   real ( kind = 8 ), parameter :: tolrte = 0.000001D+00
   real ( kind = 8 ) xij
 
   nmk = n - k
 
   if ( n < k ) then
     write ( *, * ) ' '
     write ( *, * ) 'SPLOPT - Fatal error!'
     write ( *, * ) '  N < K, N = ',n,' K = ',k
     iflag = 2
     return
   end if
 
   if ( n == k ) then
     do i = 1, k
       t(i) = tau(1)
       t(n+i) = tau(n)
     end do
     return
   end if
 
   if ( k <= 2 ) then
     write(*,*)' '
     write(*,*)'SPLOPT - Fatal error!'
     write(*,*)'  K < 2, K = ',k
     iflag = 2
     stop
   end if
 
   floatk = k
   kp1 = k+1
   kpkm1 = k+k-1
   kpn = k+n
 
   signst = -1.0D+00
   if ( (nmk/2) * 2 < nmk ) then
     signst = 1.0D+00
   end if
 !
 !  scrtch(i)=tau-extended(i), i=1,...,n+k+k
 !
   nx = n + k + k + 1
 !
 !  scrtch(i+nx)=xi(i),i=0,...,n-k+1
 !
   na = nx + nmk + 1
 !
 !  scrtch(i+na)=-a(i), i=1,...,n
 !
   nd = na + n
 !
 !  scrtch(i+nd)=x(i) or d(i), i=1,...,n-k
 !
   nb = nd+nmk
 !
 !  scrtch(i+nb)=biatx(i),i=1,...,k+1
 !
   nc = nb+kp1
 !
 !  scrtch(i+(j-1)*(2k-1)+nc)=w(i,j) = c(i-k+j,j), i=j-k,...,j+k,
 !                                                     j=1,...,n-k.
 !
   lenw = kpkm1*nmk
 !
 !  Extend TAU to a knot sequence and store in scrtch.
 !
   do j = 1, k
     scrtch(j) = tau(1)
     scrtch(kpn+j) = tau(n)
   end do
 
   do j = 1, n
     scrtch(k+j) = tau(j)
   end do
 !
 !  First guess for  scrtch (.+nx) = xi .
 !
   scrtch(nx) = tau(1)
   scrtch(nmk+1+nx) = tau(n)
 
   do j = 1, nmk
 
     sum1 = 0.0D+00
     do l = 1, k-1
       sum1 = sum1 + tau(j+l)
     end do
 
     scrtch(j+nx) = sum1 / real ( k - 1, kind = 8 )
 
   end do
 !
 !  last entry of  scrtch (.+na) =-a  is always ...
 !
   scrtch(n+na) = 0.5D+00
 !
 !  Start the Newton iteration.
 !
   newton = 1
   tol = tolrte * ( tau(n) - tau(1) ) / real ( nmk, kind = 8 )
 !
 !  Start the Newton step.
 !  compute the 2k-1 bands of the matrix c and store in scrtch(.+nc),
 !  and compute the vector  scrtch(.+na)=-a.
 !
   100 continue
 
   do i = 1, lenw
     scrtch(i+nc) = 0.0D+00
   end do
 
   do i = 2, n
     scrtch(i-1+na) = 0.0D+00
   end do
 
   sign = signst
   left = kp1
 
   do j = 1, nmk
 
     xij = scrtch(j+nx)
 
   130   continue
 
     if ( xij < scrtch(left+1) ) then
       go to 140
     end if
 
     left = left+1
     if ( left < kpn ) then
       go to 130
     end if
     left = left-1
 
   140   continue
 
     call bsplvb(scrtch,k,1,xij,left,scrtch(1+nb))
 !
 !  The TAU sequence in scrtch is preceded by  k  additional knots
 !  therefore,  scrtch(ll+nb)  now contains  b(left-2k+ll)(xij)
 !  which is destined for  c(left-2k+ll,j), and therefore for
 !    w(left-k-j+ll,j)= scrtch(left-k-j+ll+(j-1)*kpkm1 + nc)
 !  since we store the 2k-1 bands of  c  in the 2k-1  r o w s  of
 !  the work array w, and  w  in turn is stored in  s c r t c h ,
 !  with  w(1,1)=scrtch(1+nc).
 !
 !  also, c  being of order  n-k, we would want
 !    1 <= left-2k+ll .le. n-k  or
 !    llmin=2k-left  <=  ll  .le.  n-left+k = llmax .
 !
     leftmk = left-k
     index = leftmk-j+(j-1)*kpkm1+nc
     llmin = max(1,k-leftmk)
     llmax = min(k,n-leftmk)
     do ll = llmin, llmax
       scrtch(ll+index)=scrtch(ll+nb)
     end do
 
     call bsplvb (scrtch,kp1,2,xij,left,scrtch(1+nb))
     id=max(0,leftmk-kp1)
     llmin=1-min(0,leftmk-kp1)
     do ll=llmin, kp1
       id=id+1
       scrtch(id+na)=scrtch(id+na)-sign*scrtch(ll+nb)
     end do
 
     sign=-sign
 
   end do
 
   call banfac(scrtch(1+nc),kpkm1,nmk,k-1,k-1,iflag)
 
   if ( iflag == 2 ) then
     write ( *, * ) ' '
     write ( *, * ) 'SPLOPT - Fatal error!'
     write ( *, * ) '  Matrix C is not invertible.'
     stop
   end if
 !
 !  compute  scrtch (.+nd)= d  from  scrtch (.+na) =-a .
 !
   do i=n,2,-1
     scrtch(i-1+na)=scrtch(i-1+na)+scrtch(i+na)
   end do
 
   do i=1,nmk
     scrtch(i+nd)=scrtch(i+na)*(tau(i+k)-tau(i))/floatk
   end do
 !
 !  Compute  scrtch (.+nd)= x .
 !
   call banslv(scrtch(1+nc),kpkm1,nmk,k-1,k-1,scrtch(1+nd))
 !
 !  Compute  scrtch (.+nd)=change in  xi . modify, if necessary, to
 !  prevent new  xi  from moving more than 1/3 of the way to its
 !  neighbors. then add to  xi  to obtain new  xi  in scrtch(.+nx).
 !
   delmax = 0.0D+00
   sign = signst
   do i = 1, nmk
     del = sign * scrtch(i+nd)
     delmax = max ( delmax, abs ( del ) )
     if ( 0.0D+00 < del ) then
       go to 230
     end if
     del = max ( del, ( scrtch(i-1+nx) - scrtch(i+nx) ) / 3.0D+00 )
     go to 240
   230   del = min (del,(scrtch(i+1+nx)-scrtch(i+nx))/3.0D+00 )
   240   sign = -sign
     scrtch(i+nx) = scrtch(i+nx)+del
   end do
 !
 !  Call it a day in case change in  xi  was small enough or too many
 !  steps were taken.
 !
   if ( delmax < tol ) then
     go to 270
   end if
 
   newton = newton + 1
   if ( newton <= newtmx ) then
     go to 100
   end if
 
   write ( *, * ) ' '
   write ( *, * ) 'SPLOPT - Warning!'
   write ( *, * ) '  No convergence.  Number of Newton steps was ', newtmx
 
   270 continue
 
   do i = 1, nmk
     t(k+i) = scrtch(i+nx)
   end do
 
   290 continue
 
   do i=1,k
     t(i)=tau(1)
     t(n+i)=tau(n)
   end do
 
   return
 
 ! 310 iflag=2
 !
 !     return
 end
diff --git a/pppack/subbak.f90 b/pppack/subbak.f90
index c3df5e9..815b401 100644
--- a/pppack/subbak.f90
+++ b/pppack/subbak.f90
@@ -1,80 +1,80 @@
 !>
 !> @file subbak.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine subbak ( w, ipivot, nrow, ncol, last, x )
 
 !*************************************************************************
 !
 !! SUBBAK carries out backsubstitution for the current block.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    w, ipivot, nrow, ncol, last  are as on return from factrb.
 !
 !    x(1),...,x(ncol)  contains, on input, the right side for the
 !            equations in this block after backsubstitution has been
 !            carried up to but not including equation ipivot(last).
 !            means that x(j) contains the right side of equation ipi-
 !            vot(j) as modified during elimination, j=1,...,last, while
 !            for j > last, x(j) is already a component of the solut-
 !            ion vector.
 !
 !    x(1),...,x(ncol) contains, on output, the components of the solut-
 !            ion corresponding to the present block.
 !
   implicit none
 
   integer ncol
   integer nrow
 
   integer ip
   integer ipivot(nrow)
   integer j
   integer k
   integer last
   real ( kind = 8 ) s
   real ( kind = 8 ) w(nrow,ncol)
   real ( kind = 8 ) x(ncol)
 
   do k = last, 1, -1
 
     ip = ipivot(k)
 
     s = 0.0D+00
     do j = k+1, ncol
       s = s + w(ip,j) * x(j)
     end do
 
     x(k) = ( x(k) - s ) / w(ip,k)
 
   end do
 
 end
diff --git a/pppack/subfor.f90 b/pppack/subfor.f90
index 58f7afe..a1a2517 100644
--- a/pppack/subfor.f90
+++ b/pppack/subfor.f90
@@ -1,100 +1,100 @@
 !>
 !> @file subfor.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine subfor ( w, ipivot, nrow, last, b, x )
 
 !*************************************************************************
 !
 !! SUBFOR carries out the forward pass of substitution for the current block.
 !
 !  Discussion:
 !
 !    The forward pass is the action on the right side corresponding to the
 !    elimination carried out in FACTRB for this block.
 !
 !    At the end, x(j) contains the right side of the transformed
 !    ipivot(j)-th equation in this block, j=1,...,nrow. then, since
 !    for i=1,...,nrow-last, b(nrow+i) is going to be used as the right
 !    side of equation I in the next block (shifted over there from
 !    this block during factorization), it is set equal to x(last+i) here.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    w, ipivot, nrow, last  are as on return from factrb.
 !
 !    b(j)   is expected to contain, on input, the right side of j-th
 !           equation for this block, j=1,...,nrow.
 !    b(nrow+j)   contains, on output, the appropriately modified right
 !           side for equation j in next block, j=1,...,nrow-last.
 !
 !    x(j)   contains, on output, the appropriately modified right
 !           side of equation ipivot(j) in this block, j=1,...,last (and
 !           even for j=last+1,...,nrow).
 !
   implicit none
 
   integer last
   integer nrow
 
   real ( kind = 8 ) b(nrow+nrow-last)
   integer ip
   integer ipivot(nrow)
   integer j
   integer k
   real ( kind = 8 ) s
   real ( kind = 8 ) w(nrow,last)
   real ( kind = 8 ) x(nrow)
 
   ip = ipivot(1)
   x(1) = b(ip)
 
   do k = 2, nrow
 
     ip = ipivot(k)
 
     s = 0.0D+00
     do j = 1, min ( k-1, last )
       s = s + w(ip,j) * x(j)
     end do
 
     x(k) = b(ip) - s
 
   end do
 !
 !  Transfer modified right sides of equations ipivot(last+1),...,
 !  ipivot(nrow) to next block.
 !
   do k = last+1, nrow
     b(nrow-last+k) = x(k)
   end do
 
   return
 end
diff --git a/pppack/tautsp.f90 b/pppack/tautsp.f90
index d585f07..4cb0bae 100644
--- a/pppack/tautsp.f90
+++ b/pppack/tautsp.f90
@@ -1,530 +1,530 @@
 !>
 !> @file tautsp.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine tautsp ( tau, gtau, ntau, gamma, s, break, coef, l, k, iflag )
 
 !*************************************************************************
 !
 !! TAUTSP constructs a cubic spline interpolant to given data.
 !
 !  Discussion:
 !
 !    If 0 < GAMMA, additional knots are introduced where needed to
 !    make the interpolant more flexible locally.  This avoids extraneous
 !    inflection points typical of cubic spline interpolation at knots to
 !    rapidly changing data.
 !
 !  Method:
 !
 !    On the I-th interval, (TAU(I), TAU(I+1)), the interpolant is of the
 !    form:
 !
 !    (*)  f(u(x))=a+b*u + c*h(u,z) + d*h(1-u,1-z) ,
 !
 !    with
 !
 !      U = U(X) = ( X - TAU(I) ) / DTAU(I).
 !
 !    Here,
 !       z=z(i) = addg(i+1)/(addg(i)+addg(i+1))
 !  (= .5, in case the denominator vanishes). with
 !       addg(j)=abs(ddg(j)), ddg(j) = dg(j+1)-dg(j),
 !       dg(j)=divdif ( j) = (gtau(j+1)-gtau(j))/dtau(j)
 !  and
 !       h(u,z)=alpha*u**3+(1-alpha)*(max(((u-zeta)/(1-zeta)),0)**3
 !  with
 !       alpha(z)=(1-gamma/3)/zeta
 !       zeta(z)=1-gamma*min((1 - z), 1/3)
 !  thus, for 1/3 <= z .le. 2/3,  f  is just a cubic polynomial on
 !  the interval i. otherwise, it has one additional knot, at
 !         tau(i)+zeta*dtau(i) .
 !  as  z  approaches  1, h(.,z) has an increasingly sharp bend  near 1,
 !  thus allowing  f  to turn rapidly near the additional knot.
 !     in terms of f(j)=gtau(j) and
 !       fsecnd(j)= second derivative of  f  at  tau(j),
 !  the coefficients for (*) are given as
 !       a=f(i)-d
 !       b=(f(i+1)-f(i)) - (c - d)
 !       c=fsecnd(i+1)*dtau(i)**2/hsecnd(1,z)
 !       d=fsecnd(i)*dtau(i)**2/hsecnd(1,1-z)
 !  hence can be computed once fsecnd(i),i=1,...,ntau, is fixed.
 !
 !    F is automatically continuous and has a continuous second derivative
 !    (except when z=0 or 1 for some i). we determine fscnd(.) from
 !    the requirement that also the first derivative of F be continuous.
 !
 !    In addition, we require that the third derivative be continuous
 !    across TAU(2) and across TAU(NTAU-1).  This leads to a strictly
 !    diagonally dominant tridiagonal linear system for the fsecnd(i)
 !    which we solve by Gauss elimination without pivoting.
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Input, real ( kind = 8 ) TAU(NTAU), the sequence of data points.
 !    TAU must be strictly increasing.
 !
 !    Input, real ( kind = 8 ) GTAU(NTAU), the corresponding sequence of
 !    function values.
 !
 !    Input, integer NTAU, the number of data points.  NTAU must be at least 4.
 !
 !    Input, gamma  indicates whether additional flexibility is desired.
 !        =0., no additional knots
 !         in (0.,3.), under certain conditions on the given data at
 !                points i-1, i, i+1, and i+2, a knot is added in the
 !                i-th interval, i=2,...,ntau-2. see description of meth-
 !                od below. the interpolant gets rounded with increasing
 !                gamma. a value of  2.5  for gamma is typical.
 !          in (3.,6.), same , except that knots might also be added in
 !                intervals in which an inflection point would be permit-
 !                ted.  a value of  5.5  for gamma is typical.
 !
 !    Output, break, coef, l, k  give the pp-representation of the interpolant.
 !          specifically, for break(i) <= x .le. break(i+1), the
 !        interpolant has the form
 !  f(x)=coef(1,i) +dx(coef(2,i) +(dx/2)(coef(3,i) +(dx/3)coef(4,i)))
 !        with  dx=x-break(i) and i=1,...,l .
 !
 !    Output, iflag =1, ok
 !        =2, input was incorrect. a printout specifying the mistake
 !            was made.
 !            workspace
 !
 !    Output, s     is required, of size (ntau,6). the individual columns of this
 !        array contain the following quantities mentioned in the write-
 !        up and below.
 !     s(.,1)=dtau = tau(.+1)-tau
 !     s(.,2)=diag = diagonal in linear system
 !     s(.,3)=u = upper diagonal in linear system
 !     s(.,4)=r = right side for linear system (initially)
 !          = fsecnd = solution of linear system , namely the second
 !                       derivatives of interpolant at  tau
 !     s(.,5)=z = indicator of additional knots
 !     s(.,6)=1/hsecnd(1,x) with x = z or = 1-z. see below.
 !
   implicit none
 
   integer ntau
 
   real ( kind = 8 ) alph
   real ( kind = 8 ) alpha
   real ( kind = 8 ) break(*)
   real ( kind = 8 ) c
   real ( kind = 8 ) coef(4,*)
   real ( kind = 8 ) d
   real ( kind = 8 ) del
   real ( kind = 8 ) denom
   real ( kind = 8 ) divdif
   real ( kind = 8 ) entry
   real ( kind = 8 ) entry3
   real ( kind = 8 ) factor
   real ( kind = 8 ) factr2
   real ( kind = 8 ) gam
   real ( kind = 8 ) gamma
   real ( kind = 8 ) gtau(ntau)
   integer i
   integer iflag
   integer k
   integer l
   integer method
   real ( kind = 8 ) onemg3
   real ( kind = 8 ) onemzt
   real ( kind = 8 ) ratio
   real ( kind = 8 ) s(ntau,6)
   real ( kind = 8 ) sixth
   real ( kind = 8 ) tau(ntau)
   real ( kind = 8 ) temp
   real ( kind = 8 ) x
   real ( kind = 8 ) z
   real ( kind = 8 ) zeta
   real ( kind = 8 ) zt2
 
   alph(x) = min ( 1.0D+00, onemg3 / x )
 !
 !  There must be at least 4 interpolation points.
 !
   if ( ntau < 4 ) then
     write ( *, '(a)' ) ' '
     write ( *, '(a)' ) 'TAUTSP - Fatal error!'
     write ( *, '(a)' ) '  Input NTAU must be at least 4.'
     write ( *, '(a,i6)' ) '  NTAU = ', ntau
     iflag = 2
     stop
   end if
 !
 !  Construct delta tau and first and second (divided) differences of data.
 !
   do i = 1, ntau-1
 
     s(i,1) = tau(i+1)-tau(i)
 
     if ( s(i,1) <= 0.0D+00 ) then
       write(*,30)i,tau(i),tau(i+1)
    30     format (' point ',i3,' and the next',2e15.6,' are disordered')
       iflag=2
       return
     end if
 
     s(i+1,4) = ( gtau(i+1) - gtau(i) ) / s(i,1)
   end do
 
   do i = 2, ntau-1
     s(i,4) = s(i+1,4)-s(i,4)
   end do
 !
 !  Construct system of equations for second derivatives at  tau. at each
 !  interior data point, there is one continuity equation, at the first
 !  and the last interior data point there is an additional one for a
 !  total of NTAU equations in  ntau  unknowns.
 !
   i = 2
   s(2,2) = s(1,1) / 3.0D+00
   sixth = 1.0D+00 / 6.0D+00
   method = 2
   gam = gamma
 
   if ( gam <= 0.0D+00 ) then
     method=1
   end if
 
   if ( 3.0D+00 < gam ) then
     method = 3
     gam = gam - 3.0D+00
   end if
 
   onemg3 = 1.0D+00 - gam / 3.0D+00
 !
 !  loop over i
 !
    70 continue
 !
 !  Construct z(i) and zeta(i)
 !
   z = 0.5D+00
 
   if ( method == 1) then
     go to 100
   end if
 
   if ( method == 3) then
     go to 90
   end if
 
   if ( s(i,4)*s(i+1,4) < 0.0D+00 ) then
     go to 100
   end if
 
    90 continue
 
   temp = abs ( s(i+1,4) )
   denom = abs ( s(i,4) ) +temp
 
   if ( denom /= 0.0D+00 ) then
     z = temp/denom
     if ( abs ( z - 0.5D+00 ) <= sixth ) then
       z=0.5D+00
     end if
   end if
 
   100 continue
 
   s(i,5) = z
 !
 !  Set up part of the i-th equation which depends on the i-th interval.
 !
   if ( z < 0.5D+00 ) then
 
     zeta = gam*z
     onemzt = 1.0D+00 - zeta
     zt2 = zeta**2
     alpha = alph(onemzt)
     factor = zeta/(alpha*(zt2-1.0D+00 ) + 1.0D+00 )
     s(i,6) = zeta*factor / 6.0D+00
     s(i,2) = s(i,2) + s(i,1) &
       * ( ( 1.0D+00 - alpha * onemzt ) * factor / 2.0D+00-s(i,6))
 !
 !  If z=0 and the previous z = 1, then d(i) = 0. since then
 !  also u(i-1)=l(i+1) = 0, its value does not matter. reset
 !  d(i)=1 to insure nonzero pivot in elimination.
 !
     if ( s(i,2) <= 0.0D+00 ) then
       s(i,2) = 1.0D+00
     end if
 
     s(i,3)=s(i,1) / 6.0D+00
   else if ( z - 0.5D+00 == 0.0D+00 ) then
     s(i,2)=s(i,2)+s(i,1) / 3.0D+00
     s(i,3)=s(i,1) / 6.0D+00
   else if ( 0.0D+00 < z - 0.5D+00 ) then
     onemzt = gam*(1.0D+00 - z)
     zeta = 1.0D+00 - onemzt
     alpha = alph(zeta)
     factor = onemzt/(1.0D+00 - alpha * zeta * ( 1.0D+00 + onemzt ) )
     s(i,6) = onemzt*factor / 6.0D+00
     s(i,2) = s(i,2)+s(i,1) / 3.0D+00
     s(i,3) = s(i,6)*s(i,1)
   end if
 
   if ( 2 < i ) then
     go to 190
   end if
 
   s(1,5) = 0.5D+00
 !
 !  The first two equations enforce continuity of the first and of
 !  the third derivative across tau(2).
 !
   s(1,2)=s(1,1) / 6.0D+00
   s(1,3)=s(2,2)
   entry3=s(2,3)
   if ( z-0.5D+00) 150, 160, 170
 
   150 continue
 
   factr2 = zeta * ( alpha * ( zt2 - 1.0D+00 ) + 1.0D+00 ) &
     / ( alpha * ( zeta * zt2 - 1.0D+00 ) + 1.0D+00 )
 
   ratio=factr2*s(2,1)/s(1,2)
   s(2,2)=factr2*s(2,1)+s(1,1)
   s(2,3)=-factr2*s(1,1)
   go to 180
 
   160 continue
 
   ratio=s(2,1)/s(1,2)
   s(2,2)=s(2,1)+s(1,1)
   s(2,3)=-s(1,1)
   go to 180
 
   170 continue
 
   ratio=s(2,1)/s(1,2)
   s(2,2)=s(2,1)+s(1,1)
   s(2,3)=-s(1,1)*6.0D+00 * alpha * s(2,6)
 !
 !  At this point, the first two equations read
 !              diag(1)*x1+u(1)*x2 + entry3*x3=r(2)
 !       -ratio*diag(1)*x1+diag(2)*x2 + u(2)*x3=0.0
 !  Eliminate first unknown from second equation
 !
   180 continue
 
   s(2,2)=ratio*s(1,3)+s(2,2)
   s(2,3)=ratio*entry3+s(2,3)
   s(1,4)=s(2,4)
   s(2,4)=ratio*s(1,4)
   go to 200
 
   190 continue
 !
 !  The i-th equation enforces continuity of the first derivative
 !  across tau(i). it has been set up in statements 35 up to 40
 !  and 21 up to 25 and reads now
 !    -ratio*diag(i-1)*xi-1+diag(i)*xi + u(i)*xi+1=r(i) .
 !  eliminate (i-1)st unknown from this equation
 !
   s(i,2)=ratio*s(i-1,3)+s(i,2)
   s(i,4)=ratio*s(i-1,4)+s(i,4)
 !
 !  Set up the part of the next equation which depends on the
 !  i-th interval.
 !
   200 continue
 
   if ( z- 0.5D+00 ) 210, 220, 230
 
   210 continue
   ratio = -s(i,6) * s(i,1) / s(i,2)
   s(i+1,2)=s(i,1) / 3.0D+00
   go to 240
 
   220 continue
   ratio=-(s(i,1) / 6.0D+00 ) / s(i,2)
   s(i+1,2)=s(i,1) / 3.0D+00
   go to 240
 
   230 continue
   ratio=-( s(i,1) / 6.0D+00 )/s(i,2)
   s(i+1,2)=s(i,1)*((1.0D+00-zeta*alpha) * factor / 2.0D+00 - s(i,6) )
 !
 !  end of i loop
 !
   240 continue
 
   i=i+1
   if ( i < ntau-1) then
     go to 70
   end if
 
   s(i,5) = 0.5D+00
 !
 !  The last two equations enforce continuity of third derivative and
 !  of first derivative across  tau(ntau-1).
 !
   entry=ratio*s(i-1,3)+s(i,2)+s(i,1)/3.0D+00
   s(i+1,2)=s(i,1)/6.0D+00
   s(i+1,4)=ratio*s(i-1,4)+s(i,4)
   if ( z- 0.5D+00 ) 250, 260, 270
 
   250 continue
 
   ratio = s(i,1) * 6.0D+00 * s(i-1,6) * alpha / s(i-1,2)
   s(i,2)=ratio*s(i-1,3)+s(i,1)+s(i-1,1)
   s(i,3)=-s(i-1,1)
   go to 280
 
   260 continue
 
   ratio=s(i,1)/s(i-1,2)
   s(i,2)=ratio*s(i-1,3)+s(i,1)+s(i-1,1)
   s(i,3)=-s(i-1,1)
   go to 280
 
   270 continue
 
   factr2=onemzt*(alpha*(onemzt**2-1.0D+00)+1.0D+00) &
     /(alpha*(onemzt**3-1.0D+00)+1.0D+00)
 
   ratio = factr2*s(i,1) / s(i-1,2)
   s(i,2)=ratio*s(i-1,3)+factr2*s(i-1,1)+s(i,1)
   s(i,3)=-factr2*s(i-1,1)
 !
 !  At this point, the last two equations read:
 !
 !           diag(i)*xi+     u(i)*xi+1=r(i)
 !    -ratio*diag(i)*xi+diag(i+1)*xi+1=r(i+1)
 !
 !  Eliminate XI from the last equation.
 !
   280 continue
 
   s(i,4)=ratio*s(i-1,4)
   ratio=-entry/s(i,2)
   s(i+1,2)=ratio*s(i,3)+s(i+1,2)
   s(i+1,4)=ratio*s(i,4)+s(i+1,4)
 !
 !  Back substitution.
 !
   s(ntau,4) = s(ntau,4) / s(ntau,2)
 
   290 continue
 
   s(i,4)=(s(i,4)-s(i,3)*s(i+1,4))/s(i,2)
   i=i-1
   if ( 1 < i ) then
     go to 290
   end if
 
   s(1,4)=(s(1,4)-s(1,3)*s(2,4)-entry3*s(3,4))/s(1,2)
 !
 !  Construct polynomial pieces.
 !
   break(1)=tau(1)
   l=1
 
   do i=1, ntau-1
     coef(1,l)=gtau(i)
     coef(3,l)=s(i,4)
     divdif=(gtau(i+1)-gtau(i))/s(i,1)
     z=s(i,5)
     if ( z- 0.5D+00 ) 300, 310, 320
 
   300   continue
 
     if ( z == 0.0D+00 ) go to 330
     zeta=gam*z
     onemzt=1.0D+00-zeta
     c=s(i+1,4) / 6.0D+00
     d=s(i,4)*s(i,6)
     l=l+1
     del=zeta*s(i,1)
     break(l)=tau(i)+del
     zt2=zeta**2
     alpha=alph(onemzt)
     factor=onemzt**2*alpha
     coef(1,l)=gtau(i)+divdif*del+s(i,1)**2*(d*onemzt*(factor-1.0D+00) &
       +c*zeta*(zt2-1.0D+00))
     coef(2,l)=divdif+s(i,1)*(d*(1.0D+00-3.0D+00*factor)+c*(3.0D+00*zt2-1.0D+00))
     coef(3,l)=6.0D+00*(d*alpha*onemzt+c*zeta)
     coef(4,l)=6.0D+00*(c-d*alpha)/s(i,1)
     coef(4,l-1)=coef(4,l)-6.0D+00*d*(1.0D+00-alpha)/(del*zt2)
     coef(2,l-1)=coef(2,l)-del*(coef(3,l)-(del/2.0D+00)*coef(4,l-1))
     go to 340
 
   310   continue
 
     coef(2,l) = divdif - s(i,1) * ( 2.0D+00 * s(i,4) + s(i+1,4) ) / 6.0D+00
     coef(4,l)=(s(i+1,4)-s(i,4))/s(i,1)
     go to 340
 
   320 continue
 
     onemzt=gam*(1.0D+00-z)
 
     if ( onemzt == 0.0D+00 ) then
       go to 330
     end if
 
     zeta = 1.0D+00 - onemzt
     alpha=alph(zeta)
     c=s(i+1,4)*s(i,6)
     d=s(i,4)/6.0D+00
     del=zeta*s(i,1)
     break(l+1)=tau(i)+del
     coef(2,l)=divdif-s(i,1)*(2.0D+00*d+c)
     coef(4,l)=6.0D+00*(c*alpha-d)/s(i,1)
     l=l+1
     coef(4,l)=coef(4,l-1)+6.0D+00*(1.0D+00-alpha)*c/(s(i,1)*onemzt**3)
     coef(3,l)=coef(3,l-1)+del*coef(4,l-1)
     coef(2,l)=coef(2,l-1)+del*(coef(3,l-1)+(del/2.0D+00)*coef(4,l-1))
     coef(1,l)=coef(1,l-1)+del*(coef(2,l-1)+(del/2.0D+00)*(coef(3,l-1) &
       +(del/3.0D+00)*coef(4,l-1)))
     go to 340
 
   330   continue
 
     coef(2,l) = divdif
     coef(3,l) = 0D+00
     coef(4,l) = 0.0D+00
 
   340   continue
 
     l = l + 1
     break(l) = tau(i+1)
 
   end do
 
   l = l - 1
   k = 4
   iflag = 1
 
   return
 end
diff --git a/pppack/titanium.f90 b/pppack/titanium.f90
index 9879a5f..162e461 100644
--- a/pppack/titanium.f90
+++ b/pppack/titanium.f90
@@ -1,88 +1,88 @@
 !>
 !> @file titanium.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 subroutine titanium ( n, t, g )
 
 !***********************************************************************
 !
 !! TITANIUM represents a temperature dependent property of titanium.
 !
 !  Discussion:
 !
 !    The data has been used extensively as an example in spline
 !    approximation with variable knots.
 !
 !  Modified:
 !
 !    20 November 2000
 !
 !  Reference:
 !
 !    Carl DeBoor,
 !    A Practical Guide to Splines,
 !    Springer Verlag.
 !
 !  Parameters:
 !
 !    Output, integer N, the number of data points, which is 49.
 !
 !    Output, real ( kind = 8 ) T(N), the location of the data points.
 !
 !    Output, real ( kind = 8 ) G(N), the value associated with the data points.
 !
   implicit none
 
   real ( kind = 8 ) g(*)
   integer n
   real ( kind = 8 ) t(*)
 
   n = 49
 
   t(1:49) = (/ &
      595.0D+00,  605.0D+00,  615.0D+00,  625.0D+00,  635.0D+00, &
      645.0D+00,  655.0D+00,  665.0D+00,  675.0D+00,  685.0D+00, &
      695.0D+00,  705.0D+00,  715.0D+00,  725.0D+00,  735.0D+00, &
      745.0D+00,  755.0D+00,  765.0D+00,  775.0D+00,  785.0D+00, &
      795.0D+00,  805.0D+00,  815.0D+00,  825.0D+00,  835.0D+00, &
      845.0D+00,  855.0D+00,  865.0D+00,  875.0D+00,  885.0D+00, &
      895.0D+00,  905.0D+00,  915.0D+00,  925.0D+00,  935.0D+00, &
      945.0D+00,  955.0D+00,  965.0D+00,  975.0D+00,  985.0D+00, &
      995.0D+00, 1005.0D+00, 1015.0D+00, 1025.0D+00, 1035.0D+00, &
     1045.0D+00, 1055.0D+00, 1065.0D+00, 1075.0D+00 /)
 
   g(1:49) = (/ &
     0.644D+00, 0.622D+00, 0.638D+00, 0.649D+00, 0.652D+00, &
     0.639D+00, 0.646D+00, 0.657D+00, 0.652D+00, 0.655D+00, &
     0.644D+00, 0.663D+00, 0.663D+00, 0.668D+00, 0.676D+00, &
     0.676D+00, 0.686D+00, 0.679D+00, 0.678D+00, 0.683D+00, &
     0.694D+00, 0.699D+00, 0.710D+00, 0.730D+00, 0.763D+00, &
     0.812D+00, 0.907D+00, 1.044D+00, 1.336D+00, 1.881D+00, &
     2.169D+00, 2.075D+00, 1.598D+00, 1.211D+00, 0.916D+00, &
     0.746D+00, 0.672D+00, 0.627D+00, 0.615D+00, 0.607D+00, &
     0.606D+00, 0.609D+00, 0.603D+00, 0.601D+00, 0.603D+00, &
     0.601D+00, 0.611D+00, 0.601D+00, 0.608D+00 /)
 
   return
 end
diff --git a/pputils2/CMakeLists.txt b/pputils2/CMakeLists.txt
index 1b0c5d6..a0cd0e2 100644
--- a/pputils2/CMakeLists.txt
+++ b/pputils2/CMakeLists.txt
@@ -1,74 +1,74 @@
 #
 # @file CMakeLists.txt
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Nicolas Richart <nicolas.richart@epfl.ch>
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 project(pputils2)
 
 set(SRCS
   pputils2.f90
 )
 
 set(CMAKE_Fortran_MODULE_DIRECTORY
   ${CMAKE_CURRENT_BINARY_DIR}/modules
   )
 
 add_library(pputils2 STATIC ${SRCS})
 target_include_directories(pputils2
   PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/modules>
           ${MPI_Fortran_INCLUDE_PATH}
   INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/modules>
             $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
             ${MPI_Fortran_INCLUDE_PATH}
   )
 
 target_compile_options(pputils2 PUBLIC ${MPI_Fortran_COMPILE_FLAGS})
 target_link_libraries(pputils2 PUBLIC ${MPI_Fortran_LIBRARIES})
 
 set_property(TARGET pputils2
   PROPERTY PUBLIC_HEADER ${CMAKE_CURRENT_BINARY_DIR}/modules/pputils.mod)
 
 include(GNUInstallDirs)
 install(TARGETS pputils2
   EXPORT ${BSPLINES_EXPORT_TARGETS}
   LIBRARY DESTINATION ${CNAKE_INSTALL_LIBDIR}
   ARCHIVE DESTINATION ${CNAKE_INSTALL_LIBDIR}
   PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
   )
 
 if(BSPLINES_EXAMPLES)
   set(EXAMPLES ex1 ex2 ex3 ex4 ex5 ex6 ex7)
   foreach(ex ${EXAMPLES})
     add_executable(pputils2_${ex} ${ex}.f90)
     target_link_libraries(pputils2_${ex} pputils2 futils)
   endforeach()
 
   add_test(ex1 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex1)
   add_test(ex2 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 9 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex2)
   add_test(ex3 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 5 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex3)
   add_test(ex4 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 12 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex4)
   add_test(ex5 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 8 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex5)
   add_test(ex6 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 12 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex6)
   add_test(ex7 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 6 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex7)
 endif()
diff --git a/pputils2/Makefile b/pputils2/Makefile
index b0f3ddd..ad0e537 100644
--- a/pputils2/Makefile
+++ b/pputils2/Makefile
@@ -1,105 +1,105 @@
 #
 # @file Makefile
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 F90 = mpiifort
 CC = cc
 
 debug = -g -traceback -CB
 optim = -O3 -xSSE4.2
 
 #OPT=$(debug)
 OPT=$(optim)
 
 F90FLAGS =  $(OPT) -I. -I$(FUTILS)/include -I${HDF5}/lib
 CFLAGS = -O2
 LDFLAGS = $(OPT) -fPIC -L. -L$(FUTILS)/lib -L${HDF5}/lib
 LIBS =  -lfutils pputils2.o -lhdf5_fortran -lhdf5 -lz
 
 .SUFFIXES:
 .SUFFIXES: .o .c .f90
 
 .f90.o:
 	$(F90) $(F90FLAGS) -c $<
 
 all:	ex1 ex2 ex3 ex4 ex5 ex6 ex7
 
 lib:    libpputils2.a
 
 libpputils2.a: pputils2.o
 	xiar r $@ $?
 	ranlib $@
 
 ex1:	ex1.o
 	$(F90) $(LDFLAGS) -o $@ $<  $(LIBS)
 
 ex2:	ex2.o
 	$(F90) $(LDFLAGS) -o $@ $<  $(LIBS)
 
 ex3:	ex3.o
 	$(F90) $(LDFLAGS) -o $@ $<  $(LIBS)
 
 ex4:	ex4.o
 	$(F90) $(LDFLAGS) -o $@ $<  $(LIBS)
 
 ex5:	ex5.o
 	$(F90) $(LDFLAGS) -o $@ $<  $(LIBS)
 
 ex6:	ex6.o
 	$(F90) $(LDFLAGS) -o $@ $<  $(LIBS)
 
 ex7:	ex7.o
 	$(F90) $(LDFLAGS) -o $@ $<  $(LIBS)
 
 tests:	ex1 ex2 ex3 ex4 ex5 ex6 ex7
 	@echo ==== Running ex1 ======
 	@mpiexec -n 4 ./ex1
 	@echo ==== Running ex2 ======
 	@mpiexec -n 9 ./ex2
 	@echo ==== Running ex3 ======
 	@mpiexec -n 5 ./ex3
 	@echo ==== Running ex4 ======
 	@mpiexec -n 12 ./ex4
 	@echo ==== Running ex5 ======
 	@mpiexec -n 8 ./ex5
 	@echo ==== Running ex6 ======
 	@mpiexec -n 12 ./ex6
 	@echo ==== Running ex7 ======
 	@mpiexec -n 6 ./ex7
 
 ex1.o: pputils2.o
 ex2.o: pputils2.o
 ex3.o: pputils2.o
 ex4.o: pputils2.o
 ex5.o: pputils2.o
 ex6.o: pputils2.o
 ex7.o: pputils2.o
 
 tags:
 	etags *.f90 $(FUTILS)/futils.f90
 
 clean:
 	rm -f *.o *~ a.out
 distclean: clean
 	rm -f ex1 ex2 ex3 ex4 ex5 ex6 ex7 *.h5 *.a *.mod
diff --git a/pputils2/ex1.f90 b/pputils2/ex1.f90
index 46dcca6..5d1fb27 100644
--- a/pputils2/ex1.f90
+++ b/pputils2/ex1.f90
@@ -1,113 +1,113 @@
 !>
 !> @file ex1.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Tranpsose of 2d matrix partitionned on a 1d proc grid:
 !     - A(n1,n2/P1) -> AT(n2,n1/P1) -> B(n1,n2/P1)
 !
   USE pputils2
   USE futils
   IMPLICIT NONE
   INCLUDE "mpif.h"
   CHARACTER(len=32) :: file='ex1.h5'
   INTEGER :: fid
 !
   INTEGER, PARAMETER :: ndims=1              ! N. of dims of proc. grid
   INTEGER :: ierr, me, npes
   INTEGER, DIMENSION(ndims) :: dims, coords
   LOGICAL :: periods(ndims), reorder
   INTEGER :: cart
 !
   INTEGER ::  n1, n2, n1p, n2p
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: a, atr, b
   DOUBLE PRECISION :: x
   INTEGER :: i, j, iglob, jglob, kerrors, nerrors
 !================================================================================
 !
 !   Init MPI
   CALL mpi_init(ierr)
   CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
 !   Create cartesian topololy
   dims    = npes
   periods = (/.FALSE./)
   reorder = .FALSE.
   IF( PRODUCT(dims) .NE. npes ) THEN
      IF( me .EQ. 0 ) THEN
         PRINT*,  PRODUCT(dims), " processors required!"
         CALL mpi_abort(MPI_COMM_WORLD, -1, ierr)
      END IF
   END IF
   CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr)
   CALL mpi_cart_coords(cart, me, ndims, coords, ierr)
 !
 !   Define local array
   n1p=2; n1=n1p*dims(1)
   n2p=2; n2=n2p*dims(1)
   ALLOCATE( a(n1,n2p), atr(n2,n1p), b(n1,n2p) )
   a = 0
   atr = 0
   b   = 0
   DO i=1,n1
      DO j=1,n2p
         jglob = coords(1)*n2p + j
         a(i,j) = 10*i + jglob
      END DO
   END DO
   IF( me.EQ. 0 ) THEN
      WRITE(*,'(a,3i4)') 'Global dimension of matrix a', n1, n2
   END IF
 !
 !   Tranpose A(n1,n2/P1) -> AT(n2,n1/P1) -> B(n1,n2/P1)
   CALL pptransp(cart, a, atr)
   CALL pptransp(cart, atr, b)
 !
 !   Check ATR
   kerrors = 0
   DO i=1,n1p
      iglob = coords(1)*n1p + i
      DO j=1,n2
         x = 10*iglob + j
         IF( x .NE. atr(j,i) ) kerrors = kerrors+1
      END DO
   END DO
   CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
        &          MPI_COMM_WORLD, ierr)
   IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking ATR', nerrors
 !
 !   Write to file
 !
   CALL creatf(file, fid, mpicomm=cart)
   CALL putarrnd(fid, '/arraya', a, (/2/) )
   CALL putarrnd(fid, '/arrayat', atr, (/2/) )
   CALL putarrnd(fid, '/arrayb', b, (/2/) )
 !
 !   Clean up and quit
   DEALLOCATE(a, atr)
   CALL closef(fid)
   CALL mpi_finalize(ierr)
 END PROGRAM main
diff --git a/pputils2/ex2.f90 b/pputils2/ex2.f90
index 8b8d1c2..f951468 100644
--- a/pputils2/ex2.f90
+++ b/pputils2/ex2.f90
@@ -1,170 +1,170 @@
 !>
 !> @file ex2.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Tranpsose of matrices partitionned on a 2d proc grid:
 !     - A(n1,n2/P1,n3/P2)    -> AT(n3,n2/P1,n1/P2)
 !     - B(n1,n2,n3/P1,n4/P2) -> BT(n4,n2,n3/P1,n1/P2)
 !
   USE pputils2
   USE futils
   IMPLICIT NONE
   INCLUDE "mpif.h"
   CHARACTER(len=32) :: file='ex2.h5'
   INTEGER :: fid
 !
   INTEGER, PARAMETER :: ndims=2              ! N. of dims of proc. grid
   INTEGER :: ierr, me, npes
   INTEGER, DIMENSION(ndims) :: dims, coords
   LOGICAL :: periods(ndims), reorder
   INTEGER :: cart, cartcol, cartrow
 !
   INTEGER ::  n1, n2, n3, n4, n1p, n2p, n3p, n4p
   DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t
   DOUBLE PRECISION, DIMENSION(:,:,:,:), ALLOCATABLE :: b4, b4t
   DOUBLE PRECISION :: x
   INTEGER :: i, j, k, l, iglob, jglob, kglob, lglob, kerrors, nerrors
 !================================================================================
 !
 !   Init MPI
   CALL mpi_init(ierr)
   CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
 !   Create cartesian topololy
   dims    = (/3, 3/)
   periods = (/.FALSE., .TRUE./)
   reorder = .FALSE.
   IF( PRODUCT(dims) .NE. npes ) THEN
      IF( me .EQ. 0 ) THEN
         PRINT*,  PRODUCT(dims), " processors required!"
         CALL mpi_abort(MPI_COMM_WORLD, -1, ierr)
      END IF
   END IF
   CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr)
   CALL mpi_cart_coords(cart, me, ndims, coords, ierr)
   CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr)
   CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr)
 !
 !   Define local array A3
   n1p=2; n1=n1p*dims(2)
   n2p=4; n2=n2p*dims(1)
   n3p=3; n3=n3p*dims(2)
   ALLOCATE( a3(n1,n2p,n3p), a3t(n3,n2p,n1p) )
   a3 = 0
   a3t = 0
   DO i=1,n1
      DO j=1,n2p
         jglob = coords(1)*n2p + j
         DO k=1,n3p
            kglob = coords(2)*n3p + k
            a3(i,j,k) = 10000*i + 100*jglob + kglob
         END DO
      END DO
   END DO
   IF( me .EQ. 0 ) THEN
      WRITE(*,'(a,3i4)') 'Global dimension of matrix a', n1, n2, n3
   END IF
 !
 !   Tranpose A3(n1,n2/P1,n3/P2) -> A3T(n3,n2/P1,n1/P2)
   CALL pptransp(cartrow, a3, a3t, 1, 3)
 !
 !   Check A3T
   kerrors = 0
   DO i=1,n1p
      iglob = coords(2)*n1p + i
      DO j=1,n2p
         jglob = coords(1)*n2p + j
         DO k=1,n3
            x = 10000*iglob + 100*jglob + k
            IF( x .NE. a3t(k,j,i) ) kerrors = kerrors+1
         END DO
      END DO
   END DO
   CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
        &          MPI_COMM_WORLD, ierr)
   IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors
 !
 !   Define local array B4
   n1p=2; n1=n1p*dims(2)
   n2p=4; n2=n2p*dims(1)
   n3p=3; n3=n3p*dims(1)
   n4p=3; n4=n4p*dims(2)
   ALLOCATE( b4(n1,n2,n3p,n4p), b4t(n4,n2,n3p,n1p) )
   b4 = 0
   b4t = 0
   DO i=1,n1
      DO j=1,n2
         DO k=1,n3p
            kglob = coords(1)*n3p + k
            DO l=1,n4p
               lglob = coords(2)*n4p + l
               b4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob
            END DO
         END DO
      END DO
   END DO
   IF( me.EQ. 0 ) THEN
      WRITE(*,'(a,4i4)') 'Global dimension of matrix b', n1, n2, n3, n4
   END IF
 !
 !   Tranpose B4(n1,n2,n3/P1,n4/P2) -> B4T(n4,n2,n3/P1,n1/P2)
   CALL pptransp(cartrow, b4, b4t, 1, 4)
 !
   CALL mpi_barrier(MPI_COMM_WORLD, ierr)
 !
 !   Check B4T
   kerrors = 0
   DO i=1,n1p
      iglob = coords(2)*n1p + i
      DO j=1,n2
         DO k=1,n3p
            kglob = coords(1)*n3p + k
            DO l=1,n4
               x = 1000000*iglob + 10000*j + 100*kglob + l
               IF( x .NE. b4t(l,j,k,i) ) kerrors = kerrors+1
            END DO
         END DO
      END DO
   END DO
   CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
        &          MPI_COMM_WORLD, ierr)
   IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking b4', nerrors
 !
 !   Write to file
 !
   CALL creatf(file, fid, mpicomm=cart)
   CALL putarrnd(fid, '/a3' , a3, (/2,3/) )
   CALL putarrnd(fid, '/a3t', a3t,(/2,3/) )
   CALL putarrnd(fid, '/b4' , b4, (/3,4/) )
   CALL putarrnd(fid, '/b4t', b4t,(/3,4/) )
 
 !   Clean up and quit
   DEALLOCATE(a3, a3t)
   DEALLOCATE(b4, b4t)
   CALL closef(fid)
   CALL mpi_finalize(ierr)
 END PROGRAM main
diff --git a/pputils2/ex3.f90 b/pputils2/ex3.f90
index dabf46b..04de1bc 100644
--- a/pputils2/ex3.f90
+++ b/pputils2/ex3.f90
@@ -1,111 +1,111 @@
 !>
 !> @file ex3.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Tranpsose of 2d matrix partitionned on a 1d proc grid
 !     - A(n1,n2/P1) -> AT(n2,n1/P1)
 !   n1, n2 NOT REQUIRED to be divided evenly by NPES
 !
   USE pputils2
   USE futils
   IMPLICIT NONE
   INCLUDE "mpif.h"
   CHARACTER(len=32) :: file='ex3.h5'
   INTEGER :: fid
 !
   INTEGER, PARAMETER :: ndims=1              ! N. of dims of proc. grid
   INTEGER :: ierr, me, npes
   INTEGER, DIMENSION(ndims) :: dims, coords
   LOGICAL :: periods(ndims), reorder
   INTEGER :: cart
 !
   INTEGER ::  n1=9, n2=8, s1, s2, n1p, n2p
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: a, atr
   DOUBLE PRECISION :: x
   INTEGER :: i, j, iglob, jglob, kerrors, nerrors
 !================================================================================
 !
 !   Init MPI
   CALL mpi_init(ierr)
   CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
 !   Create cartesian topololy
   dims    = npes
   periods = (/.FALSE./)
   reorder = .FALSE.
   IF( PRODUCT(dims) .NE. npes ) THEN
      IF( me .EQ. 0 ) THEN
         PRINT*,  PRODUCT(dims), " processors required!"
         CALL mpi_abort(MPI_COMM_WORLD, -1, ierr)
      END IF
   END IF
   CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr)
   CALL mpi_cart_coords(cart, me, ndims, coords, ierr)
 !
 !   Partition array
   CALL dist1d(cart, 0, n1, s1, n1p)
   CALL dist1d(cart, 0, n2, s2, n2p)
   ALLOCATE( a(n1,n2p), atr(n2,n1p) )
   a = 0
   atr = 0
   DO i=1,n1
      DO j=1,n2p
         jglob = s2 + j
         a(i,j) = 10*i + jglob
      END DO
   END DO
   IF( me.EQ. 0 ) THEN
      WRITE(*,'(a,3i4)') 'Global dimension of matrix a', n1, n2
   END IF
 !
 !   Tranpose A(n1,n2/P1) -> ATR(n2,n1/P1)
   CALL pptransp(cart, a, atr)
 !
 !   Check ATR
   kerrors = 0
   DO i=1,n1p
      iglob = s1 + i
      DO j=1,n2
         x = 10*iglob + j
         IF( x .NE. atr(j,i) ) kerrors = kerrors+1
      END DO
   END DO
   CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
        &          MPI_COMM_WORLD, ierr)
   IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking ATR', nerrors
 !
 !   Write to file
 !
   CALL creatf(file, fid, mpicomm=cart)
   CALL putarrnd(fid, '/arraya', a, (/2/) )
   CALL putarrnd(fid, '/arrayat', atr, (/2/) )
 !
 !   Clean up and quit
   DEALLOCATE(a, atr)
   CALL closef(fid)
   CALL mpi_finalize(ierr)
 END PROGRAM main
diff --git a/pputils2/ex4.f90 b/pputils2/ex4.f90
index 1f75216..1ab7721 100644
--- a/pputils2/ex4.f90
+++ b/pputils2/ex4.f90
@@ -1,171 +1,171 @@
 !>
 !> @file ex4.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Tranpsose of matrices partitionned on a 2d proc grid:
 !     - A(n1,n2/P1,n3/P2)    -> AT(n3,n2/P1,n1/P2)
 !     - B(n1,n2,n3/P1,n4/P2) -> BT(n4,n2,n3/P1,n1/P2)
 !   n1, n2, n3, n4 NOT REQUIRED to be divided evenly by NPES
 !
   USE pputils2
   USE futils
   IMPLICIT NONE
   INCLUDE "mpif.h"
   CHARACTER(len=32) :: file='ex4.h5'
   INTEGER :: fid
 !
   INTEGER, PARAMETER :: ndims=2              ! N. of dims of proc. grid
   INTEGER :: ierr, me, npes
   INTEGER, DIMENSION(ndims) :: dims, coords
   LOGICAL :: periods(ndims), reorder
   INTEGER :: cart, cartcol, cartrow
 !
   INTEGER ::  n1=15, n2=10, n3=9, n4=8, n1p, n2p, n3p, n4p, s1, s2, s3, s4
   DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t
   DOUBLE PRECISION, DIMENSION(:,:,:,:), ALLOCATABLE :: b4, b4t
   DOUBLE PRECISION :: x
   INTEGER :: i, j, k, l, iglob, jglob, kglob, lglob, kerrors, nerrors
 !================================================================================
 !
 !   Init MPI
   CALL mpi_init(ierr)
   CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
 !   Create cartesian topololy
   dims    = (/4, 3/)
   periods = (/.FALSE., .TRUE./)
   reorder = .FALSE.
   IF( PRODUCT(dims) .NE. npes ) THEN
      IF( me .EQ. 0 ) THEN
         PRINT*,  PRODUCT(dims), " processors required!"
         CALL mpi_abort(MPI_COMM_WORLD, -1, ierr)
      END IF
   END IF
   CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr)
   CALL mpi_cart_coords(cart, me, ndims, coords, ierr)
   CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr)
   CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr)
 !
 !   Define local array A3
   CALL dist1d(cartrow, 0, n1, s1, n1p)
   CALL dist1d(cartcol, 0, n2, s2, n2p)
   CALL dist1d(cartrow, 0, n3, s3, n3p)
   ALLOCATE( a3(n1,n2p,n3p), a3t(n3,n2p,n1p) )
   a3 = 0
   a3t = 0
   DO i=1,n1
      DO j=1,n2p
         jglob = s2 + j
         DO k=1,n3p
            kglob = s3 + k
            a3(i,j,k) = 10000*i + 100*jglob + kglob
         END DO
      END DO
   END DO
   IF( me.EQ. 0 ) THEN
      WRITE(*,'(a,3i4)') 'Global dimension of matrix a', n1, n2, n3
   END IF
 !
 !   Tranpose A3(n1,n2/P1,n3/P2) -> A3T(n3,n2/P1,n1/P2)
   CALL pptransp(cartrow, a3, a3t, 1, 3)
 !
 !   Check A3T
   kerrors = 0
   DO i=1,n1p
      iglob = s1 + i
      DO j=1,n2p
         jglob = s2 + j
         DO k=1,n3
            x = 10000*iglob + 100*jglob + k
            IF( x .NE. a3t(k,j,i) ) kerrors = kerrors+1
         END DO
      END DO
   END DO
   CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
        &          MPI_COMM_WORLD, ierr)
   IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors
 !
 !   Define local array B4
   CALL dist1d(cartrow, 0, n1, s1, n1p)
   CALL dist1d(cartcol, 0, n3, s3, n3p)
   CALL dist1d(cartrow, 0, n4, s4, n4p)
   ALLOCATE( b4(n1,n2,n3p,n4p), b4t(n4,n2,n3p,n1p) )
   b4 = 0
   b4t = 0
   DO i=1,n1
      DO j=1,n2
         DO k=1,n3p
            kglob = s3 + k
            DO l=1,n4p
               lglob = s4 + l
               b4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob
            END DO
         END DO
      END DO
   END DO
   IF( me.EQ. 0 ) THEN
      WRITE(*,'(a,4i4)') 'Global dimension of matrix b', n1, n2, n3, n4
   END IF
 !
 !   Tranpose B4(n1,n2,n3/P1,n4/P2) -> B4T(n4,n2,n3/P1,n1/P2)
 !!$  CALL pptransp(cartrow, b4, b4t)
   CALL pptransp(cartrow, b4, b4t, 1, 4)
 !
   CALL mpi_barrier(MPI_COMM_WORLD, ierr)
 !
 !   Check B4T
   kerrors = 0
   DO i=1,n1p
      iglob = s1 + i
      DO j=1,n2
         DO k=1,n3p
            kglob = s3 + k
            DO l=1,n4
               x = 1000000*iglob + 10000*j + 100*kglob + l
               IF( x .NE. b4t(l,j,k,i) ) kerrors = kerrors+1
            END DO
         END DO
      END DO
   END DO
   CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
        &          MPI_COMM_WORLD, ierr)
   IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking b4', nerrors
 !
 !   Write to file
 !
   CALL creatf(file, fid, mpicomm=cart)
   CALL putarrnd(fid, '/a3' , a3, (/2,3/) )
   CALL putarrnd(fid, '/a3t', a3t,(/2,3/) )
   CALL putarrnd(fid, '/b4' , b4, (/3,4/) )
   CALL putarrnd(fid, '/b4t', b4t,(/3,4/) )
 
 !   Clean up and quit
   DEALLOCATE(a3, a3t)
   DEALLOCATE(b4, b4t)
   CALL closef(fid)
   CALL mpi_finalize(ierr)
 END PROGRAM main
diff --git a/pputils2/ex5.f90 b/pputils2/ex5.f90
index 181a2dc..3fc88b1 100644
--- a/pputils2/ex5.f90
+++ b/pputils2/ex5.f90
@@ -1,221 +1,221 @@
 !>
 !> @file ex5.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Tranpsose of matrices partitionned on a 2d proc grid:
 !     - A(n1,n2/P1,n3/P2)    -> AT(n2,n1/P1,n3/P2)
 !     - B(n1,n2,n3/P1,n4/P2) -> BT(n3,n2,n1/P1,n4/P2)
 !     - C(n1,n2/P1,n3,n4/P2) -> CT(n2,n1/P1,n3,n4/P2)
 !   n1, n2, n3, n4 NOT REQUIRED to be divided evenly by NPES
 !
   USE pputils2
   USE futils
   IMPLICIT NONE
   INCLUDE "mpif.h"
   CHARACTER(len=32) :: file='ex4.h5'
   INTEGER :: fid
 !
   INTEGER, PARAMETER :: ndims=2              ! N. of dims of proc. grid
   INTEGER :: ierr, me, npes
   INTEGER, DIMENSION(ndims) :: dims, coords
   LOGICAL :: periods(ndims), reorder
   INTEGER :: cart, cartcol, cartrow
 !
   INTEGER ::  n1=8, n2=10, n3=6, n4=5, n1p, n2p, n3p, n4p, s1, s2, s3, s4
   DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t
   DOUBLE PRECISION, DIMENSION(:,:,:,:), ALLOCATABLE :: b4, b4t, c4, c4t
   DOUBLE PRECISION :: x
   INTEGER :: i, j, k, l, iglob, jglob, kglob, lglob, kerrors, nerrors
 !================================================================================
 !
 !   Init MPI
   CALL mpi_init(ierr)
   CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
 !   Create cartesian topololy
   dims    = (/4, 2/)
   periods = (/.FALSE., .TRUE./)
   reorder = .FALSE.
   IF( PRODUCT(dims) .NE. npes ) THEN
      IF( me .EQ. 0 ) THEN
         PRINT*,  PRODUCT(dims), " processors required!"
         CALL mpi_abort(MPI_COMM_WORLD, -1, ierr)
      END IF
   END IF
   CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr)
   CALL mpi_cart_coords(cart, me, ndims, coords, ierr)
   CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr)
   CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr)
 !
 !   Define local array A3
   CALL dist1d(cartcol, 0, n1, s1, n1p)
   CALL dist1d(cartcol, 0, n2, s2, n2p)
   CALL dist1d(cartrow, 0, n3, s3, n3p)
   ALLOCATE( a3(n1,n2p,n3p), a3t(n2,n1p,n3p) )
   a3 = 0
   a3t = 0
   DO i=1,n1
      DO j=1,n2p
         jglob = s2 + j
         DO k=1,n3p
            kglob = s3 + k
            a3(i,j,k) = 10000*i + 100*jglob + kglob
         END DO
      END DO
   END DO
   IF( me .EQ. 0 ) THEN
      WRITE(*,'(a,4i4)') 'Global dimension of matrix a', n1, n2, n3
   END IF
 !
 !   Tranpose A(n1,n2/P1,n3/P2) -> AT(n2,n1/P1,n3/P2)
   CALL pptransp(cartcol, a3, a3t, 1, 2)
 !
   CALL mpi_barrier(MPI_COMM_WORLD, ierr)
 !
 !   Check A3T
   kerrors = 0
   DO i=1,n1p
      iglob = s1 + i
      DO j=1,n2
         DO k=1,n3p
            kglob = s3 + k
            x = 10000*iglob + 100*j + kglob
            IF( x .NE. a3t(j,i,k) ) kerrors = kerrors+1
         END DO
      END DO
   END DO
   CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
        &          MPI_COMM_WORLD, ierr)
   IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors
 !
 !   Define local array B4
   CALL dist1d(cartcol, 0, n1, s1, n1p)
   CALL dist1d(cartcol, 0, n3, s3, n3p)
   CALL dist1d(cartrow, 0, n4, s4, n4p)
   ALLOCATE( b4(n1,n2,n3p,n4p), b4t(n3,n2,n1p,n4p) )
   b4 = 0
   b4t = 0
   DO i=1,n1
      DO j=1,n2
         DO k=1,n3p
            kglob = s3 + k
            DO l=1,n4p
               lglob = s4 + l
               b4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob
            END DO
         END DO
      END DO
   END DO
   IF( me .EQ. 0 ) THEN
      WRITE(*,'(a,4i4)') 'Global dimension of matrix b', n1, n2, n3, n4
   END IF
 !
 !   Tranpose B(n1,n2,n3/P1,n4/P2) -> BT(n3,n2,n1/P1,n4/P2)
   CALL pptransp(cartcol, b4, b4t, 1, 3)
 !
   CALL mpi_barrier(MPI_COMM_WORLD, ierr)
 !
 !   Check B4T
   kerrors = 0
   DO i=1,n1p
      iglob = s1 + i
      DO j=1,n2
         DO k=1,n3
            DO l=1,n4p
               lglob = s4 + l
               x = 1000000*iglob + 10000*j + 100*k + lglob
               IF( x .NE. b4t(k,j,i,l) ) kerrors = kerrors+1
            END DO
         END DO
      END DO
   END DO
   CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
        &          MPI_COMM_WORLD, ierr)
   IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking b4', nerrors
 !
 !   Define local array C4
   CALL dist1d(cartcol, 0, n1, s1, n1p)
   CALL dist1d(cartcol, 0, n2, s2, n2p)
   CALL dist1d(cartrow, 0, n4, s4, n4p)
   ALLOCATE( c4(n1,n2p,n3,n4p), c4t(n2,n1p,n3,n4p) )
   c4 = 0
   c4t = 0
   DO i=1,n1
      DO j=1,n2p
         jglob = s2 + j
         DO k=1,n3
            DO l=1,n4p
               lglob = s4 + l
               c4(i,j,k,l) = 1000000*i + 10000*jglob + 100*k +lglob
            END DO
         END DO
      END DO
   END DO
   IF( me.EQ. 0 ) THEN
      WRITE(*,'(a,4i4)') 'Global dimension of matrix c', n1, n2, n3, n4
   END IF
 !
 !   Tranpose C(n1,n2/P1,n3,n4/P2) -> CT(n2,n1/P1,n3,n4/P2)
   CALL pptransp(cartcol, c4, c4t, 1, 2)
 !
   CALL mpi_barrier(MPI_COMM_WORLD, ierr)
 !
 !   Check C4T
   kerrors = 0
   DO i=1,n1p
      iglob = s1 + i
      DO j=1,n2
         DO k=1,n3
            DO l=1,n4p
               lglob = s4 + l
               x = 1000000*iglob + 10000*j + 100*k + lglob
               IF( x .NE. c4t(j,i,k,l) ) kerrors = kerrors+1
            END DO
         END DO
      END DO
   END DO
   CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
        &          MPI_COMM_WORLD, ierr)
   IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking c4', nerrors
 !
 !   Write to file
 !
   CALL creatf(file, fid, mpicomm=cart)
   CALL putarrnd(fid, '/a3' , a3, (/2,3/) )
   CALL putarrnd(fid, '/a3t', a3t,(/2,3/) )
   CALL putarrnd(fid, '/b4' , b4, (/3,4/) )
   CALL putarrnd(fid, '/b4t', b4t,(/3,4/) )
   CALL putarrnd(fid, '/c4' , c4, (/2,4/) )
   CALL putarrnd(fid, '/c4t', c4t,(/2,4/) )
 
 !   Clean up and quit
   DEALLOCATE(a3, a3t)
   DEALLOCATE(b4, b4t)
   DEALLOCATE(c4, c4t)
   CALL closef(fid)
   CALL mpi_finalize(ierr)
 END PROGRAM main
diff --git a/pputils2/ex6.f90 b/pputils2/ex6.f90
index defba7c..432c083 100644
--- a/pputils2/ex6.f90
+++ b/pputils2/ex6.f90
@@ -1,270 +1,270 @@
 !>
 !> @file ex6.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Tranpsose of matrices partitionned on a 2d proc grid:
 !     - A(n1/P1,n2,n3/P2)    -> AT(n1/P1,n3,n2/P2)
 !     - B(n1,n2,n3/P1,n4/P2) -> BT(n1,n3,n2/P1,n4/P2)
 !     - C(n1,n2,n3/P1,n4/P2) -> CT(n1,n4,n3/P1,n2/P2)
 !     - D(n1,n2/P1,n3,n4/P2) -> DT(n1,n2/P1,n4,n3/P2)
 !   n1, n2, n3, n4 NOT REQUIRED to be divided evenly by NPES
 !
   USE pputils2
   USE futils
   IMPLICIT NONE
   INCLUDE "mpif.h"
   CHARACTER(len=32) :: file='ex4.h5'
   INTEGER :: fid
 !
   INTEGER, PARAMETER :: ndims=2              ! N. of dims of proc. grid
   INTEGER :: ierr, me, npes
   INTEGER, DIMENSION(ndims) :: dims, coords
   LOGICAL :: periods(ndims), reorder
   INTEGER :: cart, cartcol, cartrow
 !
   INTEGER ::  n1=8, n2=10, n3=6, n4=5, n1p, n2p, n3p, n4p, s1, s2, s3, s4
   DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t
   DOUBLE PRECISION, DIMENSION(:,:,:,:), ALLOCATABLE :: b4, b4t, c4, c4t, d4, d4t
   DOUBLE PRECISION :: x
   INTEGER :: i, j, k, l, iglob, jglob, kglob, lglob, kerrors, nerrors
 !================================================================================
 !
 !   Init MPI
   CALL mpi_init(ierr)
   CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
   CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
 !
 !   Create cartesian topololy
   dims    = (/4, 3/)
   periods = (/.FALSE., .TRUE./)
   reorder = .FALSE.
   IF( PRODUCT(dims) .NE. npes ) THEN
      IF( me .EQ. 0 ) THEN
         PRINT*,  PRODUCT(dims), " processors required!"
         CALL mpi_abort(MPI_COMM_WORLD, -1, ierr)
      END IF
   END IF
   CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr)
   CALL mpi_cart_coords(cart, me, ndims, coords, ierr)
   CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr)
   CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr)
 !
 !   Define local array A3
   CALL dist1d(cartcol, 0, n1, s1, n1p)
   CALL dist1d(cartrow, 0, n2, s2, n2p)
   CALL dist1d(cartrow, 0, n3, s3, n3p)
   ALLOCATE( a3(n1p,n2,n3p), a3t(n1p,n3,n2p) )
   a3 = 0
   a3t = 0
   DO i=1,n1p
      iglob = s1 + i
      DO j=1,n2
         DO k=1,n3p
            kglob = s3 + k
            a3(i,j,k) = 10000*iglob + 100*j + kglob
         END DO
      END DO
   END DO
   IF( me .EQ. 0 ) THEN
      WRITE(*,'(a,4i4)') 'Global dimension of matrix a', n1, n2, n3
   END IF
 !
 !   Tranpose A(n1/P1,n2,n3/P2) -> AT(n1/P1,n3,n2/P2)
   CALL pptransp(cartrow, a3, a3t, 2, 3)
 !
   CALL mpi_barrier(MPI_COMM_WORLD, ierr)
 !
 !   Check A3T
   kerrors = 0
   DO i=1,n1p
      iglob = s1 + i
      DO j=1,n2p
         jglob = s2 + j
         DO k=1,n3
            x = 10000*iglob + 100*jglob + k
            IF( x .NE. a3t(i,k,j) ) kerrors = kerrors+1
         END DO
      END DO
   END DO
   CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
        &          MPI_COMM_WORLD, ierr)
   IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors
 !
 !   Define local array B4
   CALL dist1d(cartcol, 0, n2, s2, n2p)
   CALL dist1d(cartcol, 0, n3, s3, n3p)
   CALL dist1d(cartrow, 0, n4, s4, n4p)
   ALLOCATE( b4(n1,n2,n3p,n4p), b4t(n1,n3,n2p,n4p) )
   b4 = 0
   b4t = 0
   DO i=1,n1
      DO j=1,n2
         DO k=1,n3p
            kglob = s3 + k
            DO l=1,n4p
               lglob = s4 + l
               b4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob
            END DO
         END DO
      END DO
   END DO
   IF( me .EQ. 0 ) THEN
      WRITE(*,'(a,4i4)') 'Global dimension of matrix b', n1, n2, n3, n4
   END IF
 !
 !   Tranpose B(n1,n2,n3/P1,n4/P2) -> BT(n1,n3,n2/P1,n4/P2)
   CALL pptransp(cartcol, b4, b4t, 2, 3)
 !
   CALL mpi_barrier(MPI_COMM_WORLD, ierr)
 !
 !   Check B4T
   kerrors = 0
   DO i=1,n1
      DO j=1,n2p
         jglob = s2 + j
         DO k=1,n3
            DO l=1,n4p
               lglob = s4 + l
               x = 1000000*i + 10000*jglob + 100*k + lglob
               IF( x .NE. b4t(i,k,j,l) ) kerrors = kerrors+1
            END DO
         END DO
      END DO
   END DO
   CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
        &          MPI_COMM_WORLD, ierr)
   IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking b4', nerrors
 !
 !   Define local array C4
   CALL dist1d(cartrow, 0, n2, s2, n2p)
   CALL dist1d(cartcol, 0, n3, s3, n3p)
   CALL dist1d(cartrow, 0, n4, s4, n4p)
   ALLOCATE( c4(n1,n2,n3p,n4p), c4t(n1,n4,n3p,n2p) )
   c4 = 0
   c4t = 0
   DO i=1,n1
      DO j=1,n2
         DO k=1,n3p
            kglob = s3 + k
            DO l=1,n4p
               lglob = s4 + l
               c4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob
            END DO
         END DO
      END DO
   END DO
   IF( me.EQ. 0 ) THEN
      WRITE(*,'(a,4i4)') 'Global dimension of matrix c', n1, n2, n3, n4
   END IF
 !
 !   Tranpose C(n1,n2,n3/P1,n4/P2) -> CT(n1,n4,n3/P1,n2/P2)
   CALL pptransp(cartrow, c4, c4t, 2, 4)
 !
   CALL mpi_barrier(MPI_COMM_WORLD, ierr)
 !
 !   Check C4T
   kerrors = 0
   DO i=1,n1
      DO j=1,n2p
         jglob = s2 + j
         DO k=1,n3p
            kglob = s3 + k
            DO l=1,n4
               x = 1000000*i + 10000*jglob + 100*kglob + l
               IF( x .NE. c4t(i,l,k,j) ) kerrors = kerrors+1
            END DO
         END DO
      END DO
   END DO
   CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
        &          MPI_COMM_WORLD, ierr)
   IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking c4', nerrors
 !
 !   Define local array D4
   CALL dist1d(cartcol, 0, n2, s2, n2p)
   CALL dist1d(cartrow, 0, n3, s3, n3p)
   CALL dist1d(cartrow, 0, n4, s4, n4p)
   ALLOCATE( d4(n1,n2p,n3,n4p), d4t(n1,n2p,n4,n3p) )
   d4 = 0
   d4t = 0
   DO i=1,n1
      DO j=1,n2p
         jglob = s2 + j
         DO k=1,n3
            DO l=1,n4p
               lglob = s4 + l
               d4(i,j,k,l) = 1000000*i + 10000*jglob + 100*k +lglob
            END DO
         END DO
      END DO
   END DO
   IF( me.EQ. 0 ) THEN
      WRITE(*,'(a,4i4)') 'Global dimension of matrix d', n1, n2, n3, n4
   END IF
 !
 !   Tranpose D(n1,n2/P1,n3,n4/P2) -> DT(n1,n2/P1,n4,n3/P2)
   CALL pptransp(cartrow, d4, d4t, 3, 4)
 !
   CALL mpi_barrier(MPI_COMM_WORLD, ierr)
 !
 !   Check D4T
   kerrors = 0
   DO i=1,n1
      DO j=1,n2p
         jglob = s2 + j
         DO k=1,n3p
            kglob = s3 + k
            DO l=1,n4
               x = 1000000*i + 10000*jglob + 100*kglob + l
               IF( x .NE. d4t(i,j,l,k) ) kerrors = kerrors+1
            END DO
         END DO
      END DO
   END DO
   CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
        &          MPI_COMM_WORLD, ierr)
   IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking d4', nerrors
 !
 !   Write to file
 !
   CALL creatf(file, fid, mpicomm=cart)
   CALL putarrnd(fid, '/a3' , a3, (/1,3/) )
   CALL putarrnd(fid, '/a3t', a3t,(/1,3/) )
   CALL putarrnd(fid, '/b4' , b4, (/3,4/) )
   CALL putarrnd(fid, '/b4t', b4t,(/3,4/) )
   CALL putarrnd(fid, '/c4' , c4, (/3,4/) )
   CALL putarrnd(fid, '/c4t', c4t,(/3,4/) )
   CALL putarrnd(fid, '/d4' , d4, (/2,4/) )
   CALL putarrnd(fid, '/d4t', d4t,(/2,4/) )
 !
 !   Clean up and quit
   DEALLOCATE(a3, a3t)
   DEALLOCATE(b4, b4t)
   DEALLOCATE(c4, c4t)
   DEALLOCATE(d4, d4t)
   CALL closef(fid)
   CALL mpi_finalize(ierr)
 END PROGRAM main
diff --git a/pputils2/ex7.f90 b/pputils2/ex7.f90
index 0247599..7a8bb6f 100644
--- a/pputils2/ex7.f90
+++ b/pputils2/ex7.f90
@@ -1,160 +1,160 @@
 !>
 !> @file ex7.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 PROGRAM main
 !
 !   Tranpsose of 3d matrices partitionned in 1 and 2 proc grid:
 !     - A(n1,n2,n3/P)      -> AT(n3,n2,n1/P)
 !     - B(n1,n2/P1,n3/P2)  -> BT(n3,n2/P1,n1/P2)
 !   n1, n2, n3 NOT REQUIRED to be divided evenly by P
 !
 USE pputils2
   IMPLICIT NONE
   INCLUDE "mpif.h"
   INTEGER :: ierr, me, npes, comm=MPI_COMM_WORLD
   INTEGER ::  n1=15, n2=10, n3=20, n1p, n2p, n3p, s1, s2,s3
   DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t
   DOUBLE PRECISION :: x
   INTEGER :: i, j, k, iglob, jglob, kglob, kerrors, nerrors
 !
   INTEGER, PARAMETER :: ndims=2              ! N. of dims of proc. grid
   INTEGER, DIMENSION(ndims) :: dims, coords
   LOGICAL :: periods(ndims), reorder
   INTEGER :: cart, cartcol, cartrow
 !================================================================================
 !
 !   Init MPI
   CALL mpi_init(ierr)
   CALL mpi_comm_size(comm, npes, ierr)
   CALL mpi_comm_rank(comm, me, ierr)
 !
 !--------------------------------------------------------------------------------
 !
 ! 1D partition:
 !
 !   Define local array A3
   CALL dist1d(comm, 0, n1, s1, n1p)
   CALL dist1d(comm, 0, n3, s3, n3p)
   ALLOCATE( a3(n1,n2,n3p), a3t(n3,n2,n1p) )
   a3 = 0
   a3t = 0
   DO i=1,n1
      DO j=1,n2
         DO k=1,n3p
            kglob = s3 + k
            a3(i,j,k) = 10000*i + 100*j + kglob
         END DO
      END DO
   END DO
   IF( me.EQ. 0 ) THEN
      WRITE(*,'(a)') '*** 1D partition ***'
      WRITE(*,'(a,3i4)') 'Global dimensions of matrix a', n1, n2, n3
   END IF
 !
 !   Tranpose A3(n1,n2/P1,n3/P2) -> A3T(n3,n2/P1,n1/P2)
   CALL pptransp(comm, a3, a3t, 1, 3)
 !
 !   Check A3T
   kerrors = 0
   DO i=1,n1p
      iglob = s1 + i
      DO j=1,n2
         DO k=1,n3
            x = 10000*iglob + 100*j + k
            IF( x .NE. a3t(k,j,i) ) kerrors = kerrors+1
         END DO
      END DO
   END DO
   CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
        &          comm, ierr)
   IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors
   DEALLOCATE(a3, a3t)
 !--------------------------------------------------------------------------------
 !
 ! 2D partition:
 !
 !   Create cartesian topololy
   dims    = (/2, 3/)
   periods = (/.FALSE., .FALSE./)
   reorder = .FALSE.
   IF( PRODUCT(dims) .NE. npes ) THEN
      IF( me .EQ. 0 ) THEN
         PRINT*,  PRODUCT(dims), " processors required!"
         CALL mpi_abort(comm, -1, ierr)
      END IF
   END IF
   CALL mpi_barrier(comm, ierr)
 !
   CALL mpi_cart_create(comm, ndims, dims, periods, reorder, cart, ierr)
   CALL mpi_cart_coords(cart, me, ndims, coords, ierr)
   CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr)
   CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr)
 !
 !   Define local array A3
   CALL dist1d(cartrow, 0, n1, s1, n1p)
   CALL dist1d(cartcol, 0, n2, s2, n2p)
   CALL dist1d(cartrow, 0, n3, s3, n3p)
   ALLOCATE( a3(n1,n2p,n3p), a3t(n3,n2p,n1p) )
   a3 = 0
   a3t = 0
   DO i=1,n1
      DO j=1,n2p
         jglob = s2 + j
         DO k=1,n3p
            kglob = s3 + k
            a3(i,j,k) = 10000*i + 100*jglob + kglob
         END DO
      END DO
   END DO
   IF( me.EQ. 0 ) THEN
      WRITE(*,'(a)') '*** 2D partition ***'
      WRITE(*,'(a,3i4)') 'Global dimensions of matrix a', n1, n2, n3
   END IF
 !
 !   Tranpose A3(n1,n2/P1,n3/P2) -> A3T(n3,n2/P1,n1/P2)
   CALL pptransp(cartrow, a3, a3t, 1, 3)
 !
 !   Check A3T
   kerrors = 0
   DO i=1,n1p
      iglob = s1 + i
      DO j=1,n2p
         jglob = s2 + j
         DO k=1,n3
            x = 10000*iglob + 100*jglob + k
            IF( x .NE. a3t(k,j,i) ) kerrors = kerrors+1
         END DO
      END DO
   END DO
   CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
        &          comm, ierr)
   IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors
   DEALLOCATE(a3, a3t)
 !--------------------------------------------------------------------------------
 ! Epilogue
 !
   CALL mpi_finalize(ierr)
 END PROGRAM main
 
diff --git a/pputils2/pptransp2.tpl b/pputils2/pptransp2.tpl
index ada1f71..bffdc1d 100644
--- a/pputils2/pptransp2.tpl
+++ b/pputils2/pptransp2.tpl
@@ -1,89 +1,89 @@
 !>
 !> @file pptransp2.tpl
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
  !
     INTEGER :: me, npes, i, j, istr, iend, ierr
     INTEGER, DIMENSION(:), ALLOCATABLE :: ids, idr
     INTEGER, DIMENSION(:,:), ALLOCATABLE :: ndists
     INTEGER, DIMENSION(:,:), ALLOCATABLE :: offsets
     INTEGER :: dims(lastdim), np(2), npmx(2)
     INTEGER :: n1p, nlp, n1pmx, nlpmx, bufsiz, scount
     INTEGER :: status(MPI_STATUS_SIZE)
 !----------------------------------------------------------------------
 !              0.   Prologue
 !
     CALL mpi_comm_rank(comm, me, ierr)
     CALL mpi_comm_size(comm, npes, ierr)
 !
 !  Determine send/receive proc. id
     ALLOCATE(ids(npes), idr(npes))
     CALL partners(comm, ids, idr)
 !----------------------------------------------------------------------
 !              1.   Send/receive buffers
 !
 !  Distribution of first and last partitionned dimensions
     ALLOCATE(ndists(2,npes))
     ALLOCATE(offsets(2,0:npes))
     np(1) = SIZE(b,lastdim)  ! Local first
     np(2) = SIZE(a,lastdim)  ! and last dimension
     CALL mpi_allgather(np, 2, MPI_INTEGER, ndists, 2, MPI_INTEGER, comm, ierr)
     offsets = 0
     DO i=1,npes
        offsets(:,i) = offsets(:,i-1) + ndists(:,i)
     END DO
 !
 !  Allocate send and receive 1d buffers
     npmx = MAXVAL(ndists,2)
     bufsiz = npmx(1)*npmx(2)   ! Maximum size of send/receive buffers
     DO i=2,lastdim-1
        bufsiz = bufsiz * SIZE(a,i)
     END DO
     ALLOCATE(s_buf(bufsiz), r_buf(bufsiz) )
 !----------------------------------------------------------------------
 !              2.   Exchange blocks
 !
     DO i=1,npes
        istr = offsets(1,ids(i)) + 1  ! Partition a along first dim
        iend = offsets(1,ids(i)+1)
        dims = SHAPE(a)
        dims(1) = iend-istr+1
        scount = PRODUCT(dims)
        s_buf(1:scount) = RESHAPE(a(istr:iend,:), (/scount/)) !*** dim dependant ***!
        CALL MPI_SENDRECV(s_buf, scount, mpitype, ids(i), i,&
             & r_buf, bufsiz, mpitype, idr(i), i,&
             & comm, status, ierr)
        istr = offsets(2,idr(i)) + 1   ! Partition b along first dim
        iend = offsets(2,idr(i)+1)
        dims = SHAPE(b)
        dims(1) = iend-istr+1
        b(istr:iend,:) = RESHAPE(r_buf, dims, order=(/lastdim, 1/)) !*** dim dependant ***!
    END DO
 !----------------------------------------------------------------------
 !              9.   Epilogue
 !
     DEALLOCATE(ids, idr)
     DEALLOCATE(ndists, offsets)
     DEALLOCATE(s_buf, r_buf)
 !
diff --git a/pputils2/pptransp3.tpl b/pputils2/pptransp3.tpl
index ce817af..320d444 100644
--- a/pputils2/pptransp3.tpl
+++ b/pputils2/pptransp3.tpl
@@ -1,113 +1,113 @@
 !>
 !> @file pptransp3.tpl
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
     INTEGER :: me, npes, i, j, istr, iend, ierr
     INTEGER, DIMENSION(:), ALLOCATABLE :: ids, idr
     INTEGER, DIMENSION(:,:), ALLOCATABLE :: ndists
     INTEGER, DIMENSION(:,:), ALLOCATABLE :: offsets
     INTEGER :: dims(lastdim), np(2), npmx(2)
     INTEGER :: n1p, nlp, n1pmx, nlpmx, bufsiz, scount
     INTEGER :: status(MPI_STATUS_SIZE)
 !----------------------------------------------------------------------
 !              0.   Prologue
 !
     CALL mpi_comm_rank(comm, me, ierr)
     CALL mpi_comm_size(comm, npes, ierr)
 !
 !  Determine send/receive proc. id
     ALLOCATE(ids(npes), idr(npes))
     CALL partners(comm, ids, idr)
 !----------------------------------------------------------------------
 !              1.   Send/receive buffers
 !
 !  Distribution of dim1 and dim2 partitionned dimensions
     ALLOCATE(ndists(2,npes))
     ALLOCATE(offsets(2,0:npes))
     np(1) = SIZE(b, dim2)  ! Local first
     np(2) = SIZE(a, dim2)  ! and second dimension
     CALL mpi_allgather(np, 2, MPI_INTEGER, ndists, 2, MPI_INTEGER, comm, ierr)
     offsets = 0
     DO i=1,npes
        offsets(:,i) = offsets(:,i-1) + ndists(:,i)
     END DO
 !
 !  Allocate send and receive 1d buffers
     npmx = MAXVAL(ndists,2)
     bufsiz = npmx(1)*npmx(2)   ! Maximum size of send/receive buffers
     DO i=1,lastdim
        IF ( (i .NE. dim1) .AND. (i .NE. dim2) ) bufsiz = bufsiz * SIZE(a,i)
     END DO
     ALLOCATE(s_buf(bufsiz), r_buf(bufsiz) )
 !----------------------------------------------------------------------
 !              2.   Exchange blocks
 !
     IF ( (dim1 .EQ. 1) .AND. ( dim2 .EQ. 2 ) ) THEN      !*** dim dependant ***!
        recv_order = (/2,1,3/)                            !*** dim dependant ***!           
     ELSE IF ( (dim1 .EQ. 1) .AND. ( dim2 .eq. 3 ) ) THEN !*** dim dependant ***!
        recv_order = (/3,2,1/)                            !*** dim dependant ***!
     ELSE IF ( (dim1 .EQ. 2) .AND. ( dim2 .eq. 3 ) ) THEN !*** dim dependant ***!
        recv_order = (/1,3,2/)                            !*** dim dependant ***!
     ELSE
        IF ( me .EQ. 0 ) THEN
           WRITE(*, '(a,i4,a,i4,a)') 'pptransp3: Cannot handle case dim1 = ', dim1, ', dim2 = ', dim2, '!'
           CALL mpi_abort(MPI_COMM_WORLD, -1, ierr)
        END IF
     END IF
 !
     DO i=1,npes
        istr = offsets(1,ids(i)) + 1  ! Partition a along dimension dim1
        iend = offsets(1,ids(i)+1)
        dims = SHAPE(a)
        dims(dim1) = iend-istr+1
        scount = PRODUCT(dims)
 
        IF (dim1 .EQ. 1) THEN                                      !*** dim dependant ***!
           s_buf(1:scount) = RESHAPE(a(istr:iend,:,:), (/scount/)) !*** dim dependant ***!
        ELSE IF (dim1 .EQ. 2) THEN                                 !*** dim dependant ***!
           s_buf(1:scount) = RESHAPE(a(:,istr:iend,:), (/scount/)) !*** dim dependant ***!
        END IF                                                     !*** dim dependant ***!
 
        CALL MPI_SENDRECV(s_buf, scount, mpitype, ids(i), i,&
             & r_buf, bufsiz, mpitype, idr(i), i,&
             & comm, status, ierr)
        istr = offsets(2,idr(i)) + 1   ! Partition b along dimension dim1
        iend = offsets(2,idr(i)+1)
        dims = SHAPE(b)
        dims(dim1) = iend-istr+1
 
        IF (dim1 .EQ. 1) THEN                                         !*** dim dependant ***!
           b(istr:iend,:,:) = RESHAPE(r_buf, dims, order=recv_order)  !*** dim dependant ***!
        ELSE IF (dim1 .EQ. 2) THEN                                    !*** dim dependant ***!
           b(:,istr:iend,:) = RESHAPE(r_buf, dims, order=recv_order)  !*** dim dependant ***!
        END IF                                                        !*** dim dependant ***!
 
    END DO
 !----------------------------------------------------------------------
 !              9.   Epilogue
 !
     DEALLOCATE(ids, idr)
     DEALLOCATE(ndists, offsets)
     DEALLOCATE(s_buf, r_buf)
 !
diff --git a/pputils2/pptransp4.tpl b/pputils2/pptransp4.tpl
index cec2004..780ddf7 100644
--- a/pputils2/pptransp4.tpl
+++ b/pputils2/pptransp4.tpl
@@ -1,122 +1,122 @@
 !>
 !> @file pptransp4.tpl
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
     INTEGER :: me, npes, i, j, istr, iend, ierr
     INTEGER, DIMENSION(:), ALLOCATABLE :: ids, idr
     INTEGER, DIMENSION(:,:), ALLOCATABLE :: ndists
     INTEGER, DIMENSION(:,:), ALLOCATABLE :: offsets
     INTEGER :: dims(lastdim), np(2), npmx(2)
     INTEGER :: n1p, nlp, n1pmx, nlpmx, bufsiz, scount
     INTEGER :: status(MPI_STATUS_SIZE)
 !----------------------------------------------------------------------
 !              0.   Prologue
 !
     CALL mpi_comm_rank(comm, me, ierr)
     CALL mpi_comm_size(comm, npes, ierr)
 !
 !  Determine send/receive proc. id
     ALLOCATE(ids(npes), idr(npes))
     CALL partners(comm, ids, idr)
 !----------------------------------------------------------------------
 !              1.   Send/receive buffers
 !
 !  Distribution of dim1 and dim2 partitionned dimensions
     ALLOCATE(ndists(2,npes))
     ALLOCATE(offsets(2,0:npes))
     np(1) = SIZE(b, dim2)  ! Local first
     np(2) = SIZE(a, dim2)  ! and second dimension
     CALL mpi_allgather(np, 2, MPI_INTEGER, ndists, 2, MPI_INTEGER, comm, ierr)
     offsets = 0
     DO i=1,npes
        offsets(:,i) = offsets(:,i-1) + ndists(:,i)
     END DO
 !
 !  Allocate send and receive 1d buffers
     npmx = MAXVAL(ndists,2)
     bufsiz = npmx(1)*npmx(2)   ! Maximum size of send/receive buffers
     DO i=1,lastdim
        IF ( (i .NE. dim1) .AND. (i .NE. dim2) ) bufsiz = bufsiz * SIZE(a,i)
     END DO
     ALLOCATE(s_buf(bufsiz), r_buf(bufsiz) )
 !----------------------------------------------------------------------
 !              2.   Exchange blocks
 !
     IF ( (dim1 .EQ. 1) .AND. (dim2 .EQ. 2) ) THEN        !*** dim dependant ***!
        recv_order = (/2,1,3,4/)                          !*** dim dependant ***!
     ELSE IF ( (dim1 .EQ. 1) .AND. ( dim2 .eq. 3 ) ) THEN !*** dim dependant ***!
        recv_order = (/3,2,1,4/)                          !*** dim dependant ***!
     ELSE IF ( (dim1 .EQ. 1) .AND. ( dim2 .eq. 4 ) ) THEN !*** dim dependant ***!
        recv_order = (/4,2,3,1/)                          !*** dim dependant ***!
     ELSE IF ( (dim1 .EQ. 2) .AND. ( dim2 .eq. 3 ) ) THEN !*** dim dependant ***!
        recv_order = (/1,3,2,4/)                          !*** dim dependant ***!
     ELSE IF ( (dim1 .EQ. 2) .AND. ( dim2 .eq. 4 ) ) THEN !*** dim dependant ***!
        recv_order = (/1,4,3,2/)                          !*** dim dependant ***!
     ELSE IF ( (dim1 .EQ. 3) .AND. ( dim2 .eq. 4 ) ) THEN !*** dim dependant ***!
        recv_order = (/1,2,4,3/)                          !*** dim dependant ***!
     ELSE
        IF ( me .EQ. 0 ) THEN
           WRITE(*, '(a,i4,a,i4,a)') 'pptransp4: Cannot handle case dim1 = ', dim1, ', dim2 = ', dim2, '!'
           CALL mpi_abort(MPI_COMM_WORLD, -1, ierr)
        END IF
     END IF
 !
     DO i=1,npes
        istr = offsets(1,ids(i)) + 1  ! Partition a along dimension dim1
        iend = offsets(1,ids(i)+1)
        dims = SHAPE(a)
        dims(dim1) = iend-istr+1
        scount = PRODUCT(dims)
 
        IF (dim1 .EQ. 1) THEN                                        !*** dim dependant ***!
           s_buf(1:scount) = RESHAPE(a(istr:iend,:,:,:), (/scount/)) !*** dim dependant ***!
        ELSE IF (dim1 .EQ. 2) THEN                                   !*** dim dependant ***!
           s_buf(1:scount) = RESHAPE(a(:,istr:iend,:,:), (/scount/)) !*** dim dependant ***!
        ELSE IF (dim1 .EQ. 3) THEN                                   !*** dim dependant ***!
           s_buf(1:scount) = RESHAPE(a(:,:,istr:iend,:), (/scount/)) !*** dim dependant ***!
        END IF                                                       !*** dim dependant ***!
 
        CALL MPI_SENDRECV(s_buf, scount, mpitype, ids(i), i,&
             & r_buf, bufsiz, mpitype, idr(i), i,&
             & comm, status, ierr)
        istr = offsets(2,idr(i)) + 1   ! Partition b along dimension dim1
        iend = offsets(2,idr(i)+1)
        dims = SHAPE(b)
        dims(dim1) = iend-istr+1
 
        IF (dim1 .EQ. 1) THEN                                          !*** dim dependant ***!
           b(istr:iend,:,:,:) = RESHAPE(r_buf, dims, order=recv_order) !*** dim dependant ***!
        ELSE IF (dim1 .EQ. 2) THEN                                     !*** dim dependant ***!
           b(:,istr:iend,:,:) = RESHAPE(r_buf, dims, order=recv_order) !*** dim dependant ***!
        ELSE IF (dim1 .EQ. 3) THEN                                     !*** dim dependant ***!
           b(:,:,istr:iend,:) = RESHAPE(r_buf, dims, order=recv_order) !*** dim dependant ***!
        END IF                                                         !*** dim dependant ***!
    END DO
 !----------------------------------------------------------------------
 !              9.   Epilogue
 !
     DEALLOCATE(ids, idr)
     DEALLOCATE(ndists, offsets)
     DEALLOCATE(s_buf, r_buf)
 !
diff --git a/pputils2/pputils2.f90 b/pputils2/pputils2.f90
index 348a14a..e29570b 100644
--- a/pputils2/pputils2.f90
+++ b/pputils2/pputils2.f90
@@ -1,456 +1,456 @@
 !>
 !> @file pputils2.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE pputils2
 !
 !    PPUTILS2: Some MPI utilities.
 !
 !    T.M. Tran, CRPP-EPFL
 !    September 2010
 !    September 2013: add exchange, norm2
 !    November 2013: add timera, hostlist
 !
   USE iso_fortran_env, ONLY : rkind => real64
   USE mpi
   IMPLICIT NONE
   PRIVATE
   PUBLIC :: pptransp, dist1d, exchange, ppnorm2, timera, hostlist
 !
   INTERFACE pptransp
      MODULE PROCEDURE pptransp2_r, pptransp3_r, pptransp4_r
      MODULE PROCEDURE pptransp2_c, pptransp3_c, pptransp4_c
   END INTERFACE
   INTERFACE exchange
      MODULE PROCEDURE exchange_2d, exchange_2d_new
   END INTERFACE exchange
   INTERFACE ppnorm2
      MODULE PROCEDURE norm2_para_2d
   END INTERFACE ppnorm2
 !
 CONTAINS
 !=======================================================================
   SUBROUTINE pptransp2_r(comm, a, b)
 !
 ! Handles double precision-type matrices.
 !
 ! Transpose of rank 2 matrix A:
 !    A(n1,n2/P) -> B(n2,n1/P)
 !
     INTEGER, INTENT(in)                      :: comm
     REAL(rkind), DIMENSION(:,:), INTENT(in)  :: a               !*** dim dependant ***!
     REAL(rkind), DIMENSION(:,:), INTENT(out) :: b              !*** dim dependant ***!
     INTEGER, PARAMETER :: lastdim = 2, mpitype=MPI_DOUBLE_PRECISION !*** dim dependant ***!
     REAL(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf
 !
     INCLUDE 'pptransp2.tpl'
 !
   END SUBROUTINE pptransp2_r
 !======================================================================= 
   SUBROUTINE pptransp2_c(comm, a, b)
 !
 ! Same as pptransp2_r, but for double complex-type matrices.
 !
     INTEGER, INTENT(in)                         :: comm
     COMPLEX(rkind), DIMENSION(:,:), INTENT(in)  :: a               !*** dim dependant ***!
     COMPLEX(rkind), DIMENSION(:,:), INTENT(out) :: b              !*** dim dependant ***!
     INTEGER, PARAMETER :: lastdim = 2, mpitype=MPI_DOUBLE_COMPLEX !*** dim dependant ***!
     COMPLEX(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf
 !
     INCLUDE 'pptransp2.tpl'
 !
   END SUBROUTINE pptransp2_c
 !=======================================================================
   SUBROUTINE pptransp3_r(comm, a, b, dim1, dim2)
 !
 ! Handles double precision-type matrices.
 !
 ! Transpose dimensions dim1 and dim2 of rank 3 matrix A. 
 ! dim1 and dim2 are such that 1 <= dim1 < dim2 <= 3.
 ! At input, matrix A is partitioned along dimension dim2 of matrix A.
 ! At exit, B = transpose(A), and B is partitioned along dimension dim1 of matrix A.
 !
 ! For example:
 !    dim1 = 1, dim2 = 2 : A(n1,n2/P,n3) -> B(n2,n1/P,n3)
 !    dim1 = 1, dim2 = 3 : A(n1,n2,n3/P) -> B(n3,n2,n1/P)
 !    dim1 = 2, dim2 = 3 : A(n1,n2,n3/P) -> B(n1,n3,n2/P)
 !
     INTEGER, INTENT(in)                        :: comm
     REAL(rkind), DIMENSION(:,:,:), INTENT(in)  :: a             !*** dim dependant ***!
     REAL(rkind), DIMENSION(:,:,:), INTENT(out) :: b            !*** dim dependant ***!
     INTEGER, INTENT(in) :: dim1, dim2
     INTEGER, PARAMETER :: lastdim = 3, mpitype=MPI_DOUBLE_PRECISION !*** dim dependant ***!
     REAL(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf
     INTEGER :: recv_order(lastdim)
  ! 
     INCLUDE 'pptransp3.tpl'
  ! 
   END SUBROUTINE pptransp3_r
 !=======================================================================
   SUBROUTINE pptransp3_c(comm, a, b, dim1, dim2)
 !
 ! Same as pptransp3_r, but for double complex-type matrices.
 !
     INTEGER, INTENT(in)                           :: comm
     COMPLEX(rkind), DIMENSION(:,:,:), INTENT(in)  :: a             !*** dim dependant ***!
     COMPLEX(rkind), DIMENSION(:,:,:), INTENT(out) :: b            !*** dim dependant ***!
     INTEGER, INTENT(in) :: dim1, dim2
     INTEGER, PARAMETER :: lastdim = 3, mpitype=MPI_DOUBLE_COMPLEX !*** dim dependant ***!
     COMPLEX(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf
     INTEGER :: recv_order(lastdim)
  ! 
     INCLUDE 'pptransp3.tpl'
  ! 
   END SUBROUTINE pptransp3_c
 !=======================================================================
   SUBROUTINE pptransp4_r(comm, a, b, dim1, dim2)
 !
 ! Handles double precision-type matrices.
 !
 ! Transpose dimensions dim1 and dim2 of rank 4 matrix A. 
 ! dim1 and dim2 are such that 1 <= dim1 < dim2 <= 4.
 ! At input, matrix A is partitioned along dimension dim2 of matrix A.
 ! At exit, B = transpose(A), and B is partitioned along dimension dim1 of matrix A.
 !
 ! For example:
 !    dim1 = 1, dim2 = 2 : A(n1,n2/P,n3  ,n4  ) -> B(n2,n1/P,n3  ,n4  )
 !    dim1 = 1, dim2 = 3 : A(n1,n2  ,n3/P,n4  ) -> B(n3,n2  ,n1/P,n4  )
 !    dim1 = 1, dim2 = 4 : A(n1,n2  ,n3  ,n4/P) -> B(n4,n2  ,n3  ,n1/P)
 !    dim1 = 2, dim2 = 3 : A(n1,n2  ,n3/P,n4  ) -> B(n1,n3  ,n2/P,n4  )
 !    dim1 = 2, dim2 = 4 : A(n1,n2  ,n3  ,n4/P) -> B(n1,n4  ,n3  ,n2/P)
 !    dim1 = 3, dim2 = 4 : A(n1,n2  ,n3  ,n4/P) -> B(n1,n2  ,n4  ,n3/P)
 !
     INTEGER, INTENT(in)                          :: comm
     REAL(rkind), DIMENSION(:,:,:,:), INTENT(in ) :: a           !*** dim dependant ***!
     REAL(rkind), DIMENSION(:,:,:,:), INTENT(out) :: b          !*** dim dependant ***!
     INTEGER, INTENT(in) :: dim1, dim2
     INTEGER, PARAMETER :: lastdim = 4, mpitype=MPI_DOUBLE_PRECISION !*** dim dependant ***!
     REAL(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf
     INTEGER :: recv_order(lastdim)
 !
     INCLUDE 'pptransp4.tpl'
 !
   END SUBROUTINE pptransp4_r
 !=======================================================================
   SUBROUTINE pptransp4_c(comm, a, b, dim1, dim2)
 !
 ! Same as pptransp4_r, but for double complex-type matrices
 !
     INTEGER, INTENT(in)                             :: comm
     COMPLEX(rkind), DIMENSION(:,:,:,:), INTENT(in)  :: a           !*** dim dependant ***!
     COMPLEX(rkind), DIMENSION(:,:,:,:), INTENT(out) :: b          !*** dim dependant ***!
     INTEGER, INTENT(in)                             :: dim1, dim2
     INTEGER, PARAMETER :: lastdim = 4, mpitype=MPI_DOUBLE_COMPLEX !*** dim dependant ***!
     COMPLEX(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf
     INTEGER :: recv_order(lastdim)
 !
     INCLUDE 'pptransp4.tpl'
 !
   END SUBROUTINE pptransp4_c
 !=======================================================================
   SUBROUTINE dist1d(comm, s0, ntot, s, nloc)
 !
 !  1d distribute ntot elements, returns offset s and local number of 
 !  elements nloc.
 ! 
     INTEGER, INTENT(in)  :: s0, ntot
     INTEGER, INTENT(out) :: s, nloc
     INTEGER :: comm, me, npes, ierr, naver, rem
 !
     CALL MPI_COMM_SIZE(comm, npes, ierr)
     CALL MPI_COMM_RANK(comm, me, ierr)
     naver = ntot/npes
     rem = MODULO(ntot,npes)
     s = s0 + MIN(rem,me) + me*naver
     nloc = naver
     IF( me.LT.rem ) nloc = nloc+1
 !
   END SUBROUTINE dist1d
 !=======================================================================
   SUBROUTINE exchange_2d_new(comm, u, garea)
 !
 !  Exhange ghost cells with (west,east,south,north) neighbors.
 !  Assume same ghost cells on each dimension:
 !    garea(1) : number of ghost cells on west and east boundaries
 !    garea(2) : number of ghost cells on south and north boundaries
 !  Both are equal to 1 by default.
 !
     INTEGER, INTENT(in)                     :: comm
     REAL(rkind), ALLOCATABLE, INTENT(inout) :: u(:,:)
     INTEGER, OPTIONAL, INTENT(in)           :: garea(2)
     INTEGER :: neighs(4), ierr
 !
     CALL mpi_cart_shift(comm, 0, 1, neighs(1), neighs(2), ierr)
     CALL mpi_cart_shift(comm, 1, 1, neighs(3), neighs(4), ierr)
     CALL exchange_2d(comm, neighs, u, garea)
   END SUBROUTINE exchange_2d_new
 !=======================================================================
   SUBROUTINE exchange_2d(comm, neighs, u, garea)
 !
 !  Exhange ghost cells with (west,east,south,north) neighbors.
 !  Assume same ghost cells on each dimension:
 !    garea(1) : number of ghost cells on west and east boundaries
 !    garea(2) : number of ghost cells on south and north boundaries
 !  Both are equal to 1 by default.
 !
     INTEGER, INTENT(in)                     :: comm
     INTEGER, INTENT(in)                     :: neighs(4)
     REAL(rkind), ALLOCATABLE, INTENT(inout) :: u(:,:)
     INTEGER, OPTIONAL, INTENT(in)           :: garea(2)
 !
     INTEGER                  :: cols, rows
     INTEGER                  :: ierr
     INTEGER, PARAMETER       :: ndim=2
     INTEGER, DIMENSION(ndim) :: g, lb, ub, s, e, n
 !
     g = [1,1]
     IF(PRESENT(garea)) g = garea
     lb = LBOUND(u)
     ub = UBOUND(u)
     s = lb + g
     e = ub - g
     n = ub - lb + 1   ! include ghost cells
 !
 !   g(2) matrix full rows with stride n(1)
     CALL mpi_type_vector(n(2), g(2), n(1), MPI_DOUBLE_PRECISION, rows, ierr)
     CALL mpi_type_commit(rows, ierr)
 !
 !   g(1) contiguous matrix full columns
     CALL mpi_type_contiguous(n(1)*g(1), MPI_DOUBLE_PRECISION,  cols, ierr)
     CALL mpi_type_commit(cols, ierr)
 !
 !  Exchange along first dimension
     CALL mpi_sendrecv(u(s(1),  lb(2)), 1, rows, neighs(1), 0, &
          &            u(e(1)+1,lb(2)), 1, rows, neighs(2), 0, &
          &                                  comm, MPI_STATUS_IGNORE, ierr)
     CALL mpi_sendrecv(u(e(1)-g(1)+1,lb(2)), 1, rows, neighs(2), 0, &
          &            u(lb(1),      lb(2)), 1, rows, neighs(1), 0, &
          &                                  comm, MPI_STATUS_IGNORE, ierr)
 !
 !  Exchange along second dimension
     CALL mpi_sendrecv(u(lb(1),s(2)),   1, cols, neighs(3), 0, &
          &            u(lb(1),e(2)+1), 1, cols, neighs(4), 0, &
          &                                  comm, MPI_STATUS_IGNORE, ierr)
     CALL mpi_sendrecv(u(lb(1),e(2)-g(2)+1),   1, cols, neighs(4), 0, &
          &            u(lb(1),lb(2)),         1, cols, neighs(3), 0, &
          &                                  comm, MPI_STATUS_IGNORE, ierr)
   END SUBROUTINE exchange_2d
 !=======================================================================
   FUNCTION norm2_para_2d(x, comm, root, garea) RESULT(res)
 !
 !  Vector norm of 2d distributed array with ghost cells
 !
     USE mpi
     REAL(rkind), ALLOCATABLE, INTENT(in) :: x(:,:)
     INTEGER, INTENT(in)                  :: comm
     INTEGER, INTENT(in), OPTIONAL        :: root
     INTEGER, INTENT(in), OPTIONAL        :: garea(:)
     REAL(rkind)                          :: res
     INTEGER, PARAMETER       :: ndim=2
     INTEGER, DIMENSION(ndim) :: g, s, e
     REAL(rkind)              :: res_loc
     INTEGER                  :: r, me, ierr
 !
     CALL mpi_comm_rank(comm, me, ierr)
     g = [1,1]
     IF(PRESENT(garea)) g = garea
     r = 0
     IF(PRESENT(root)) r = root
     s = LBOUND(x) + g
     e = UBOUND(x) - g
     res_loc = SUM(x(s(1):e(1),s(2):e(2))**2)
     CALL mpi_reduce(res_loc, res, 1, MPI_DOUBLE_PRECISION, MPI_SUM, r, comm, ierr)
     if(me.eq.r) res = SQRT(res)    
   END FUNCTION norm2_para_2d
 !=======================================================================
   SUBROUTINE timera(cntrl, str, eltime, comm)
 !
 !   Timers (cntrl=0/1 to Init/Update)
 !
     USE mpi
     INTEGER, INTENT(in)                     :: cntrl
     CHARACTER(len=*), INTENT(in)            :: str
     DOUBLE PRECISION, OPTIONAL, INTENT(out) :: eltime
     INTEGER, OPTIONAL, INTENT(in)           :: comm
 !
     INTEGER, PARAMETER :: ncmax=128, maxlen=32
 !
     INTEGER, SAVE                            :: icall=0, nc=0
     DOUBLE PRECISION, SAVE                   :: startt0=0.0
     DOUBLE PRECISION, DIMENSION(ncmax), SAVE :: startt = 0.0, endt = 0.0
     CHARACTER(len=maxlen), SAVE              :: which(ncmax)
 !
     DOUBLE PRECISION, DIMENSION(ncmax) :: endtmin, endtmax
     INTEGER :: comm0, me, lstr, found, i, ierr
 !________________________________________________________________________________
     IF(PRESENT(comm)) THEN
        comm0 = comm
     ELSE
        comm0 = MPI_COMM_WORLD
     END IF
     CALL mpi_comm_rank(comm0, me, ierr)
     CALL mpi_barrier(comm0, ierr)
 !________________________________________________________________________________
 !
     IF( icall .EQ. 0 ) THEN
        icall = icall+1
        startt0 = mpi_wtime()
     END IF
 
     lstr = MIN(LEN_TRIM(str),maxlen)
     IF( lstr .GT. 0 ) found = loc(str)
 !________________________________________________________________________________
 !
     SELECT CASE (cntrl)
 !
     CASE(-1)    !  Current wall time
        IF( PRESENT(eltime) ) THEN
           eltime = mpi_wtime() - startt0
        ELSE IF (me .EQ. 0 ) THEN
           WRITE(*,'(/a,a,1pe10.3/)') "++ ", ' Wall time used so far = ', &
                &                     mpi_wtime() - startt0
        END IF
 !
     CASE(0)    !  Init Timer
        IF( found .EQ. 0 ) THEN  !  Called for the 1st time for 'str'
           nc = nc+1
           which(nc) = str(1:lstr)
           found = nc
        END IF
        startt(found) = mpi_wtime()
 !
     CASE(1)   !  Update timer
        endt(found) = mpi_wtime() - startt(found)
        IF( PRESENT(eltime) ) THEN
           eltime = endt(found)
        ELSE IF (me .EQ. 0 ) THEN
           WRITE(*,'(/a,a,1pe10.3/)') "++ "//str, ' wall clock time = ', &
                &                     endt(found)
        END IF
 !
     CASE(2)   !  Update and reset timer
        endt(found) = endt(found) + mpi_wtime() - startt(found)
        startt(found) = mpi_wtime()
        IF( PRESENT(eltime) ) THEN
           eltime = endt(found)
        END IF
 !
     CASE(9)   !  Display all timers
        IF( nc .GT. 0 ) THEN
           CALL mpi_reduce(endt, endtmin, nc, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm0, ierr)
           CALL mpi_reduce(endt, endtmax, nc, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm0, ierr)
           IF( me .EQ. 0 ) THEN
              WRITE(*,'(a)') "Minmax Timer Summary"
              WRITE(*,'(a)') "===================="
              DO i=1,nc
                 WRITE(*,'(a20,2x,2(1pe12.3))') TRIM(which(i))//":", endtmin(i), endtmax(i)
              END DO
           END IF
        END IF
 !
     END SELECT
 !
   CONTAINS
     INTEGER FUNCTION loc(str)
       CHARACTER(len=*), INTENT(in) :: str
       INTEGER :: i, ind
       loc = 0
       DO i=1,nc
          ind = INDEX(which(i), str(1:lstr))
          IF( ind .GT. 0 .AND. LEN_TRIM(which(i)) .EQ. lstr ) THEN
             loc = i
             EXIT
          END IF
       END DO
     END FUNCTION loc
   END SUBROUTINE timera
 !=======================================================================
   SUBROUTINE hostlist(comm)
 !
 !   Print list of hostnames in comm
 !
     USE mpi
     INTEGER, OPTIONAL, INTENT(in) :: comm
 !
     CHARACTER(len=MPI_MAX_PROCESSOR_NAME) :: procname
     CHARACTER(len=MPI_MAX_PROCESSOR_NAME), ALLOCATABLE :: procnames(:)
     INTEGER :: comm0, me, nprocs, ierr, i, l
 !
     IF(PRESENT(comm)) THEN
        comm0 = comm
     ELSE
        comm0 = mpi_comm_world
     END IF
     CALL MPI_COMM_RANK(comm0, me, ierr)
     CALL MPI_COMM_SIZE(comm0, nprocs, ierr)
     CALL MPI_GET_PROCESSOR_NAME(procname, l, ierr)
     ALLOCATE(procnames(0:nprocs-1))
     CALL mpi_gather(procname,MPI_MAX_PROCESSOR_NAME,mpi_character, &
        &          procnames,MPI_MAX_PROCESSOR_NAME,mpi_character,0, &
        &          comm0,ierr)
     IF(me.EQ.0) THEN
        WRITE(*,'(a/(10(1x,a)))') 'Host list:', &
             &                   (TRIM(procnames(i)),i=0,nprocs-1)
     END IF
   END SUBROUTINE hostlist
 !=======================================================================
 !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !                     Private routines/functions                       !
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   SUBROUTINE partners(comm, ids, idr)
 !
 !   Compute ranks of send and receive procs.
 !
     IMPLICIT NONE
     INTEGER, INTENT(in)  :: comm
     INTEGER, INTENT(out) :: ids(:), idr(:)
     INTEGER :: me, npes, ierr, i
 !
     CALL mpi_comm_rank(comm, me, ierr)
     CALL mpi_comm_size(comm, npes, ierr)
     IF( ispower2(npes) ) THEN
        DO i=0,npes-1
           ids(i+1) = IEOR(me, i)
           idr(i+1) = ids(i+1)
       END DO
     ELSE
        DO i=0,npes-1
           ids(i+1) = MODULO(me+i, npes)
           idr(i+1) = MODULO(me-i, npes)
        END DO
     END IF
   END SUBROUTINE partners
 !======================================================================= 
   LOGICAL FUNCTION ispower2(n)
     INTEGER, INTENT(in) :: n
     INTEGER :: l
     l=2
     DO WHILE ( l .LT. n ) 
        l = 2*l
     END DO
     ispower2 = l .EQ. n    
   END FUNCTION ispower2
 !======================================================================= 
 END MODULE pputils2
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index b55fdb1..5d069c5 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,111 +1,111 @@
 #
 # @file CMakeLists.txt
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Nicolas Richart <nicolas.richart@epfl.ch>
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 project(bsplines_src)
 
 set(SRCS
   bsplines.f90
   matrix.f90
   sparse_mod.f90
   lapack_extra.f
   math_util.f90
 )
 
 set(SRCS_PP
   conmat.f90
   )
 
 set(PUBLIC_MODULES
   bsplines.mod
   matrix.mod
   math_util.mod
   conmat_mod.mod
   sparse.mod
   )
 
 if(HAS_PARDISO)
   list(APPEND ${SRCS_PP} pardiso_mod.f90)
 endif()
 
 set_property(SOURCE conmat.f90 APPEND PROPERTY COMPILE_OPTIONS -DWSMP ${MKL_DEFINITIONS})
 
 if(HAS_MUMPS)
   list(APPEND SRCS
     multigrid_mod.f90
     )
 
   list(APPEND SRCS_PP
     mumps_mod.f90
     csr_mod.f90
     cds_mod.f90
   )
 
   list(APPEND PUBLIC_MODULES
     cds.mod
     csr.mod
     mumps_bsplines.mod)
 
   set_property(SOURCE conmat.f90 APPEND PROPERTY COMPILE_OPTIONS -DMUMPS)
 endif()
 
 set(_public_headers)
 foreach(_modules ${PUBLIC_MODULES})
   list(APPEND _public_headers ${CMAKE_CURRENT_BINARY_DIR}/${_modules})
 endforeach()
 set_property(SOURCE ${SRCS_PP} APPEND PROPERTY COMPILE_OPTIONS -cpp)
 
 include(GNUInstallDirs)
 
 add_library(bsplines STATIC ${SRCS} ${SRCS_PP})
 target_include_directories(bsplines
   PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
           ${MUMPS_INCLUDE_DIR}
   INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
             $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
   )
 
 set_property(TARGET bsplines
   PROPERTY PUBLIC_HEADER ${_public_headers})
 
 target_link_libraries(bsplines
   PUBLIC futils pppack pputils2 fft
          ${BLAS_LIBRARIES}
          ${MUMPS_LIBRARIES}
          ${LAPACK_LIBRARIES}
   )
 
 
 if(MKL_Fortran_FLAGS)
   separate_arguments(MKL_Fortran_FLAGS)
   target_compile_options(bsplines PUBLIC ${MKL_Fortran_FLAGS})
   target_link_options(bsplines PUBLIC ${MKL_Fortran_FLAGS})
 endif()
 
 install(TARGETS bsplines
   EXPORT ${BSPLINES_EXPORT_TARGETS}
   ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
   PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
 )
diff --git a/src/Makefile b/src/Makefile
index 229a18e..edee4a5 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,176 +1,176 @@
 #
 # @file Makefile
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Stephan Brunner <stephan.brunner@epfl.ch>
 # @author Sébastien Jolliet <sebastien.jolliet@epfl.ch>
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 PREFIX=/usr/local/crpp
 FUTILS=$(PREFIX)/futils
 PPPACK=../pppack
 PPUTILS2=../pputils2
 
 MPIF90 = mpif90
 F90 = mpif90
 LD = $(MPIF90)
 
 debug = -g -traceback -check bounds -fpe0 -warn alignments -warn unused
 debug = -g -traceback -check bounds -fpe0 -warn alignments
 optim = -O3 -xHOST
 
 #OPT=$(debug)
 OPT=$(optim)
 
 F90FLAGS = $(OPT) -fPIC -I. -I$(FUTILS)/include
 
 CC = cc
 CFLAGS = -O2
 
 SPL_OBJS = bsplines.o matrix.o sparse_mod.o pardiso_mod.o \
            lapack_extra.o conmat.o math_util.o
 
 ifdef MKL
 SPBLAS = -DMKL
 endif
 
 ifdef MUMPS
 SPL_OBJS += mumps_mod.o csr_mod.o cds_mod.o multigrid_mod.o
 F90FLAGS += -I$(MUMPS)/include
 endif
 
 ifdef WSMP
 SPL_OBJS += wsmp_mod.o pwsmp_mod.o
 endif
 
 ifdef PETSC_DIR
 SPL_OBJS += petsc_mod.o
 FCCPFLAGS = -I$(PETSC_DIR)/include -I$(PETSC_DIR)/$(PETSC_ARCH)/include
 endif
 
 .SUFFIXES:
 .SUFFIXES: .o .c .f90 .f .F90
 
 .f90.o:
 	$(MPIF90) $(F90FLAGS) -c $<
 .F90.o:
 	$(MPIF90) $(F90FLAGS) $(FCCPFLAGS) -c $<
 .f.o:
 	$(F90) $(F90FLAGS) -c $<
 
 SUBDIRS = pputils2 pppack fft
 subdirs: $(SUBDIRS)
 .PHONY: subdirs $(SUBSDIRS) $(PPUTILS2)
 
 $(SUBDIRS):
 	$(MAKE) "OPT=$(OPT)" -C ../$@ lib
 
 lib:	subdirs libbsplines.a
 	cp -p $(PPPACK)/libpppack.a ./
 	touch lib
 	cp -p lib ../examples
 
 libbsplines.a: $(SPL_OBJS)
 	xiar r $@ $?
 	ranlib $@
 
 debug:
 	make clean
 	make "OPT=$(debug)" lib
 	mkdir -p .g
 	cp -p libbsplines.a $(PPPACK)/libpppack.a *.mod .g/
 
 opt:
 	make clean
 	make "OPT=$(optim)" lib
 	mkdir -p $(PREFIX)/{lib,include}/O
 	mkdir -p .O
 	cp -p libbsplines.a $(PPPACK)/libpppack.a *.mod .O/
 
 install: debug opt
 	mkdir -p $(PREFIX)/{lib,include}/g
 	mv .g/*.a $(PREFIX)/lib/g/
 	mv .g/*.mod $(PREFIX)/include/g/
 	mkdir -p $(PREFIX)/{lib,include}/O
 	mv .O/*.a $(PREFIX)/lib/O/
 	mv .O/*.mod $(PREFIX)/include/O/
 
 
 uninstall:
 	rm -f $(PREFIX)/include/{O,g}/bsplines.mod \
               $(PREFIX)/include/{O,g}/cds.mod \
               $(PREFIX)/include/{O,g}/conmat_mod.mod \
               $(PREFIX)/include/{O,g}/csr.mod \
               $(PREFIX)/include/{O,g}/math_util.mod \
               $(PREFIX)/include/{O,g}/matrix.mod \
               $(PREFIX)/include/{O,g}/multigrid.mod \
               $(PREFIX)/include/{O,g}/mumps_bsplines.mod \
               $(PREFIX)/include/{O,g}/pardiso_bsplines.mod \
               $(PREFIX)/include/{O,g}/petsc_bsplines.mod \
               $(PREFIX)/include/{O,g}/sparse.mod \
               $(PREFIX)/include/{O,g}/wsmp_bsplines.mod \
               $(PREFIX)/lib/{O,g}/libbsplines.a \
               $(PREFIX)/lib/{O,g}/libpppack.a
 
 matrix.o: matrix.f90
 sparse_mod.o: sparse_mod.f90
 bsplines.o: bsplines.f90 matrix.o
 multigrid_mod.o: bsplines.o matrix.o conmat.o csr_mod.o cds_mod.o
 
 conmat.o: conmat.f90 conmat.tpl conmat_1d.tpl zconmat.tpl zconmat_1d.tpl conrhs.tpl
 	    $(F90) -fpp -DMKL -DWSMP -DMUMPS $(F90FLAGS) -c conmat.f90
 
 cds_mod.o: cds_mod.f90
 	    $(F90) -fpp $(SPBLAS) $(F90FLAGS) -c cds_mod.f90
 
 pardiso_mod.o: pardiso_mod.f90 sparse_mod.o psum_mat.tpl p2p_mat.tpl
 	    $(F90) -fpp $(SPBLAS) $(F90FLAGS) -c pardiso_mod.f90
 
 mumps_mod.o:mumps_mod.f90  sparse_mod.o psum_mat.tpl p2p_mat.tpl
 	    $(F90) -fpp $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c mumps_mod.f90
 
 wsmp_mod.o: wsmp_mod.f90 sparse_mod.o psum_mat.tpl p2p_mat.tpl
 	    $(F90) -fpp $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c wsmp_mod.f90
 
 pwsmp_mod.o: pwsmp_mod.f90 sparse_mod.o wsmp_mod.o psum_mat.tpl p2p_mat.tpl
 	$(F90) -fpp $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c pwsmp_mod.f90
 
 petsc_mod.o: petsc_mod.F90 sparse_mod.o
 	$(F90) -fpp $(FCCPFLAGS) $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c petsc_mod.F90
 
 csr_mod.o: csr_mod.f90 sparse_mod.o mumps_mod.o
 	    $(F90) -fpp $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c csr_mod.f90
 
 tags:
 	etags *.f *.f90 $(PPPACK)/*.f90
 
 clean:
 	$(MAKE) -C $(PPPACK) clean
 	$(MAKE) -C ../fft clean
 	rm -f *.o *.mod *~ a.out
 
 distclean: clean
 	$(MAKE) -C $(PPPACK) distclean
 	$(MAKE) -C ../fft distclean
 	$(MAKE) -C $(PPUTILS2) distclean
 	rm -f lib *.a *.mod ../bin/*
 	rm -rf .O .g
diff --git a/src/bsplines.f90 b/src/bsplines.f90
index 1feff79..0ab9ca8 100644
--- a/src/bsplines.f90
+++ b/src/bsplines.f90
@@ -1,4285 +1,4285 @@
 !>
 !> @file bsplines.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Stephan Brunner <stephan.brunner@epfl.ch>
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE bsplines
 !
 !    BSPLINES: A module to construct B-Splines of any order on
 !              non-equidistant mesh. Can be used for interpolation and
 !              Finite Element discretization.
 !
 !    T.M. Tran, S. Brunner, CRPP-EPFL
 !    February 2007
 !
   USE matrix
   IMPLICIT NONE
   PRIVATE
   PUBLIC :: spline1d, set_spline, get_dim, get_gauss, gridval
   PUBLIC :: spline2d, spline2d1d, def_knots, allsplines
   PUBLIC :: set_splcoef, get_splcoef
   PUBLIC :: fintg, calc_integ, destroy_sp
   PUBLIC :: gauleg, CompMassMatrix
   PUBLIC :: basfun_recur, basfun, def_basfun, is_equid, locintv_old, locintv
   PUBLIC :: calc_fftmass, calc_fftmass_old
   PUBLIC :: init_dft, ft_basfun
   PUBLIC :: getgrad
   PUBLIC :: dftmap
 !
   DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0
 !
   TYPE dftmap
      INTEGER :: n                                    ! Total number of modes
      INTEGER :: kmin, kmax                           ! Define the Foyrier window
      INTEGER :: dk                                   ! Number of modes in window
      DOUBLE PRECISION :: dx                          ! Interval in real space
      INTEGER, POINTER :: mode_couplings(:) => NULL() ! Table of mode couplings
      DOUBLE COMPLEX, POINTER :: coefs(:,:) => NULL() ! The restricted Fourier coefs
   END TYPE dftmap
 !
   TYPE spline1d
      INTEGER :: order                 ! Spline order = spline degree + 1
      INTEGER :: nints                 ! Number of knot intervals
      INTEGER :: nsites                ! Number of interpolation sites
      INTEGER :: dim                   ! Dimension of spline space
      INTEGER :: left=0                ! Save value used by routine LOCATE
      LOGICAL :: period                ! is the grid periodic ?
      LOGICAL :: nlequid               ! is the grid equidistant ?
      LOGICAL :: nlppform              ! Construct and build PPFORM in GRIDVAL if .TRUE.
      DOUBLE PRECISION :: lperiod      ! Periodicity
      DOUBLE PRECISION :: hinv         ! Inverse of mesh size, when nlequid=T
      INTEGER, POINTER ::&
           & fmap(:) => NULL()         ! Mapping of fine to coarse mesh
      DOUBLE PRECISION, POINTER :: &
           & knots(:) => NULL(), &     ! Spline knots (-p:n+p)
           & val0(:,:,:) => NULL(), &  ! Values and deriv. at left boundary of all splines 
           & valc(:,:) => NULL(), &    ! Values and deriv. at left boundary of equid. per. splines 
           & gausx(:,:) => NULL(), &   ! Gauss abscissas
           & gausw(:,:) => NULL(), &   ! Gauss weights
           & intspl(:) => NULL(), &    ! Integral of splines
           & ppform(:,:) => NULL(), &  ! PPFORM coefs
           & bcoefs(:) => NULL()       ! Spline coefs
      DOUBLE COMPLEX, POINTER :: &
           & ppformz(:,:) => NULL(), & ! PPFORM coefs for complex function
           & bcoefsc(:) => NULL()      ! Spline coefs for complex function
      TYPE(GBMAT) :: mat               ! Interpolation matrix
      TYPE(periodic_mat) :: matp       ! Interpolation matrix (periodic case)
      TYPE(dftmap) :: dft                ! Define DFT mapping
   END TYPE spline1d
 !
   TYPE spline2d
      TYPE(spline1d) :: sp1                                  ! Spline in direction 1
      TYPE(spline1d) :: sp2                                  ! Spline in direction 2
      DOUBLE PRECISION, POINTER :: ppform(:,:,:,:) => NULL() ! 2d PPFORM coefs
      DOUBLE PRECISION, POINTER :: bcoefs(:,:) => NULL()     ! Spline coefs
      DOUBLE COMPLEX, POINTER :: ppformz(:,:,:,:) => NULL()  ! PPFORM coefs for complex function
      DOUBLE COMPLEX, POINTER :: bcoefsc(:,:) => NULL()      ! Spline coefs for complex function
   END TYPE spline2d
 !
   TYPE spline2d1d
      TYPE(spline2d) :: sp12  ! 2D spline for dir. 1 and 2
      TYPE(spline1d) :: sp3   ! 1D spline for dir. 3
      DOUBLE PRECISION, POINTER :: ppform(:,:,:,:,:,:) => NULL() ! PPFORM coefs
      DOUBLE PRECISION, POINTER :: bcoefs(:,:,:) => NULL()       ! Spline coefs
      DOUBLE PRECISION, POINTER :: ppformz(:,:,:,:,:,:) => NULL()! PPFORM coefs for complex function
      DOUBLE COMPLEX, POINTER :: bcoefsc(:,:,:) => NULL()        ! Spline coefs for complex function
   END TYPE spline2d1d
 !
   INTERFACE set_spline
      MODULE PROCEDURE set_spline1d, set_spline2d, set_spline2d1d
   END INTERFACE
   INTERFACE get_dim
      MODULE PROCEDURE get_dim1, get_dim2
   END INTERFACE
   INTERFACE gridval
      MODULE PROCEDURE gridval1d, gridval1dz, &
           &           gridval2d, gridval2dz, &
           &           gridval2d_1d, gridval2d_1dz, &
           &           gridval2d_2d, gridval2d_2dz, &
           &           gridval2d1d_3d, gridval2d1d_1d 
   END INTERFACE
   INTERFACE set_splcoef
      MODULE PROCEDURE set_splcoef1d, set_splcoef2d
   END INTERFACE
   INTERFACE get_splcoef
      MODULE PROCEDURE get_splcoef1, get_splcoef1z, get_splcoefn, &
           &           get_splcoef2d, get_splcoef2dz
   END INTERFACE
   INTERFACE fintg
      MODULE PROCEDURE fintg1, fintg2
   END INTERFACE
   INTERFACE destroy_sp
      MODULE PROCEDURE destroy_sp1d, destroy_sp2d, destroy_sp2d1d
   END INTERFACE
   INTERFACE CompMassMatrix
      MODULE PROCEDURE CompMassMatrix1, CompMassMatrix_gb, CompMassMatrix_zgb
   END INTERFACE
   INTERFACE calc_integ
      MODULE PROCEDURE calc_integ0,calc_integn 
   END INTERFACE
   INTERFACE locintv
      MODULE PROCEDURE locintv0, locintv1
   END INTERFACE locintv
   INTERFACE locintv_old
      MODULE PROCEDURE locintv0_old, locintv1_old
   END INTERFACE locintv_old
   INTERFACE ppval
      MODULE PROCEDURE ppval0,  ppval1,  ppval2, &
           &           ppval0_n, &
           &           ppval0z, ppval1z, ppval2z, &
           &           ppval0z_n
   END INTERFACE ppval
   INTERFACE basfun
      MODULE PROCEDURE basfun0, basfun1
   END INTERFACE basfun
   INTERFACE ft_basfun
      MODULE PROCEDURE ft_basfun0, ft_basfun1
   END INTERFACE ft_basfun
   INTERFACE def_basfun
      MODULE PROCEDURE def_basfun0, def_basfun1
   END INTERFACE def_basfun
   INTERFACE getgrad
      MODULE PROCEDURE getgradr, getgradz
   END INTERFACE getgrad
 !
 CONTAINS
 !===========================================================================
   SUBROUTINE set_spline1d(p, ngauss, grid, sp, period, nlppform, nlequid)
 !
 !  Setup a spline
 !
     INTEGER, INTENT(in) :: p, ngauss
     DOUBLE PRECISION, INTENT(in)  :: grid(:)
     TYPE(spline1d), INTENT(out)   :: sp
     LOGICAL, OPTIONAL, INTENT(in) :: period
     LOGICAL, OPTIONAL, INTENT(in) :: nlppform
     LOGICAL, OPTIONAL, INTENT(in) :: nlequid
 !
     DOUBLE COMPLEX :: zc
     DOUBLE PRECISION :: leng, xp, factinv, h
     INTEGER :: order, nints, i, k
     DOUBLE PRECISION :: temp(1:p+1,0:p)
 !
 !  Order of splines
     order = p+1
     sp%order = order
 !
 !  Dimension of spline space
     nints = SIZE(grid)-1
     sp%nints = nints
     sp%period = .FALSE.
     IF( PRESENT(period) ) THEN 
        sp%period = period
        sp%lperiod = grid(nints+1) - grid(1)
     END IF
     sp%dim = nints+p
 !
 !  Use or not PPFORM
     sp%nlppform = .TRUE.
     IF( PRESENT(nlppform) ) THEN
        sp%nlppform = nlppform
     END IF
 !
 !  Determine sequence of knots
     IF( ASSOCIATED(sp%knots) ) DEALLOCATE(sp%knots)
     ALLOCATE( sp%knots(-p:nints+p) )
     sp%knots(0:nints) = grid(:)
 !
 !  Is the grid equidistant ?
     IF( PRESENT(nlequid) ) THEN
        sp%nlequid = nlequid
     ELSE
        sp%nlequid = is_equid(grid)
     END IF
 !
 !  Coarse to fine mesh mapping for non-equidistant mesh
     IF(sp%nlequid) THEN
        sp%hinv = 1.0d0/(sp%knots(1)-sp%knots(0))
     ELSE
        IF(ASSOCIATED(sp%fmap)) DEALLOCATE(sp%fmap)
        CALL create_fine(sp%knots(0:nints), h, sp%fmap)
        sp%hinv = 1.0d0/h
     END IF
 !
 !  Extend knots at both sides of given grid points
     IF( sp%period ) THEN
        leng = sp%knots(nints) - sp%knots(0)
        DO i=-1,-p,-1
           sp%knots(i) = sp%knots(nints+i) - leng
        END DO
        DO i=1,p
           sp%knots(nints+i) = sp%knots(i) + leng
        END DO
 !!$       sp%knots(-p:-1) = sp%knots(nints-p:nints-1) - leng
 !!$       sp%knots(nints+1:nints+p) = leng + sp%knots(1:p)
     ELSE
        sp%knots(-p:-1) = sp%knots(0)
        sp%knots(nints+1:nints+p) = sp%knots(nints)
     END IF
 !
 !  Precalculated values of all splines and their derivatives at left boundaries 
     IF( ASSOCIATED(sp%val0) ) DEALLOCATE(sp%val0)
     ALLOCATE( sp%val0(0:p, p+1, 1:nints) )
     sp%val0 = 0.0d0
     DO i=1,nints
        xp = sp%knots(i-1) + EPSILON(1.0d0)*ABS(sp%knots(i-1))
        CALL basfun_recur(xp, sp, temp, i)
        sp%val0(:,:,i) = TRANSPOSE(temp)
     END DO
 !
     factinv = 1.0d0
     DO k=2,p   !   Divide by k! for use in PPFORM_ALT
        factinv = factinv/k
        sp%val0(k,:,:) = sp%val0(k,:,:)*factinv
     END DO
 !
 ! Case of periodic equidistant splines (translational invariance)
     IF(sp%period .AND. sp%nlequid) THEN
        IF( ASSOCIATED(sp%valc) ) DEALLOCATE(sp%valc)
        ALLOCATE(sp%valc(0:p, p+1))
        sp%valc = sp%val0(:,:,1)
     END IF
 !
 !  Gauss abscissas and weights
     IF( ngauss .GT. 0 ) THEN
        IF( ASSOCIATED(sp%gausx) ) DEALLOCATE(sp%gausx)
        IF( ASSOCIATED(sp%gausw) ) DEALLOCATE(sp%gausw)
        ALLOCATE(sp%gausx(ngauss,nints))
        ALLOCATE(sp%gausw(ngauss,nints))
        DO i=1,nints
           CALL gauleg(sp%knots(i-1), sp%knots(i),  &
                &      sp%gausx(1:ngauss,i), sp%gausw(1:ngauss,i), ngauss)
        END DO
     END IF
 !
 !  Compute integral of each splines
     IF( ASSOCIATED(sp%intspl) ) DEALLOCATE(sp%intspl)
     ALLOCATE(sp%intspl(0:sp%dim-1))
     CALL calc_integ(sp, sp%intspl)
 !
   END SUBROUTINE set_spline1d
 !===========================================================================
   SUBROUTINE init_dft(sp, kmin, kmax, couplings)
 !
 !   Initialize DFT
 !
     TYPE(spline1d)      :: sp
     INTEGER, INTENT(in) :: kmin, kmax
     INTEGER, INTENT(in), OPTIONAL :: couplings(:)
 !
     INTEGER :: n, p, dk, k, j, nc
     DOUBLE COMPLEX :: zc
 !
     n = sp%nints
     p = sp%order-1
     dk = kmax-kmin+1
 !
 ! Check that -N/2 .LE. Kmin .LE. Kmax .LT. N/2
 !
     IF(kmin.GT.kmax .OR. kmin.LT.-n/2 .OR. kmax.GE.n/2) THEN
        WRITE(*,'(a,2i6,a)') 'kmin, kmax =', kmin, kmax, ' erroneous!'
        STOP
     END IF
 !
 ! The Fourier window
 !
     sp%dft%n = n
     sp%dft%kmin = kmin
     sp%dft%kmax = kmax
     sp%dft%dk = dk
     sp%dft%dx = sp%knots(1) - sp%knots(0)
 !
 ! Precalculate the DFT coefs  exp( i(2*pi/N)jk ), k=kmin,kmax, j=0,p
 !
     IF( ASSOCIATED(sp%dft%coefs) ) DEALLOCATE(sp%dft%coefs)
     ALLOCATE(sp%dft%coefs(kmin:kmax,0:p))
     zc = EXP( CMPLX(0.0d0, 2.0d0*pi/REAL(n,8),8) )
     sp%dft%coefs(:,0) = 1.0d0       ! j=0
     sp%dft%coefs(kmin,1) = zc**kmin ! j=1 
     DO k=kmin+1,kmax
        sp%dft%coefs(k,1) = sp%dft%coefs(k-1,1)*zc
     END DO
     DO j=2,p
        sp%dft%coefs(:,j) = sp%dft%coefs(:,1)*sp%dft%coefs(:,j-1)
     END DO
 !
 !   Mode couplings: by default use the whole window
 !
     nc = dk
     IF( PRESENT(couplings)) nc = SIZE(couplings)
 !
     IF(ASSOCIATED(sp%dft%mode_couplings)) DEALLOCATE(sp%dft%mode_couplings)
     ALLOCATE(sp%dft%mode_couplings(nc))
 !
     IF(PRESENT(couplings)) THEN
        sp%dft%mode_couplings = couplings
     ELSE
        sp%dft%mode_couplings = (/ (k,k=kmin,kmax) /)
     END IF
   END SUBROUTINE init_dft
 !===========================================================================
   SUBROUTINE set_spline2d(p, ngauss, grid1, grid2, sp, period, nlppform,&
        &                  nlequid)
 !
 !  Setup a 2d spline
 !
     INTEGER, INTENT(in) :: p(2), ngauss(2)
     DOUBLE PRECISION, INTENT(in)  :: grid1(:)
     DOUBLE PRECISION, INTENT(in)  :: grid2(:)
     TYPE(spline2d), INTENT(out)   :: sp
     LOGICAL, OPTIONAL, INTENT(in) :: period(2)
     LOGICAL, OPTIONAL, INTENT(in) :: nlppform
     LOGICAL, OPTIONAL, INTENT(in) :: nlequid(2)
 !
     IF(PRESENT(period).AND.PRESENT(nlppform).AND.PRESENT(nlequid)) THEN
        CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, period(1), nlppform, &
             &            nlequid(1))
        CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, period(2), nlppform, &
             &            nlequid(2))
     ELSE IF(PRESENT(period).AND.PRESENT(nlppform)) THEN
        CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, period(1), nlppform)
        CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, period(2), nlppform)
     ELSE IF(PRESENT(period).AND.PRESENT(nlequid)) THEN
        CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, period=period(1), nlequid=nlequid(1))
        CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, period=period(2), nlequid=nlequid(2))
     ELSE IF(PRESENT(nlppform).AND.PRESENT(nlequid)) THEN
        CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, nlppform=nlppform, nlequid=nlequid(1))
        CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, nlppform=nlppform, nlequid=nlequid(2))
     ELSE IF(PRESENT(period)) THEN
        CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, period(1))
        CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, period(2))
     ELSE IF(PRESENT(nlppform)) THEN
        CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, nlppform=nlppform)
        CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, nlppform=nlppform)
     ELSE IF(PRESENT(nlequid)) THEN
        CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, nlequid=nlequid(1))
        CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, nlequid=nlequid(2))
     ELSE
        CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1)
        CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2)
     END IF
   END SUBROUTINE set_spline2d
 !===========================================================================
   SUBROUTINE set_spline2d1d(p, ngauss, grid1, grid2, grid3, sp, period, &
        &                    nlppform, nlequid)
 !
 !  Setup a 2d1d spline (for axisymmetric problems)
 !
     INTEGER, INTENT(in) :: p(3), ngauss(3)
     DOUBLE PRECISION, INTENT(in)  :: grid1(:)
     DOUBLE PRECISION, INTENT(in)  :: grid2(:)
     DOUBLE PRECISION, INTENT(in)  :: grid3(:)
     TYPE(spline2d1d), INTENT(out) :: sp
     LOGICAL, OPTIONAL, INTENT(in) :: period(3)
     LOGICAL, OPTIONAL, INTENT(in) :: nlppform
     LOGICAL, OPTIONAL, INTENT(in) :: nlequid(3)
 !
     IF(PRESENT(period).AND.PRESENT(nlppform).AND.PRESENT(nlequid)) THEN
        CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, period(1:2),&
             &            nlppform, nlequid(1:2))
        CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, period(3), nlppform, &
             &            nlequid(3))
     ELSE IF(PRESENT(period).AND.PRESENT(nlppform)) THEN
        CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, period(1:2),&
             &            nlppform)
        CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, period(3), nlppform)
     ELSE IF(PRESENT(period).AND.PRESENT(nlequid)) THEN
        CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, period=period(1:2),&
             &            nlequid=nlequid(1:2))
        CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, period=period(3), &
             &            nlequid=nlequid(3))
     ELSE IF(PRESENT(nlppform).AND.PRESENT(nlequid)) THEN
        CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, nlppform=nlppform,&
             &            nlequid=nlequid(1:2))
        CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, nlppform=nlppform,  &
             &            nlequid=nlequid(3))
     ELSE IF(PRESENT(period)) THEN
        CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, period(1:2))
        CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, period(3))
     ELSE IF(PRESENT(nlppform)) THEN
        CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, nlppform=nlppform)
        CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, nlppform=nlppform)
     ELSE IF(PRESENT(nlequid)) THEN
        CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, nlequid=nlequid(1:2))
        CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, nlequid=nlequid(3))
     ELSE
        CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12)
        CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3)
     END IF
   END SUBROUTINE set_spline2d1d
 !===========================================================================
   SUBROUTINE get_dim1(sp, dim, nx, nidbas)
 !
 !  Return spline dimension of 1d spline sp and optionally
 !  number of knot intervals nx and spline degree nidbas
 !
     TYPE(spline1d), INTENT(in) :: sp
     INTEGER, INTENT(out) :: dim
     INTEGER, OPTIONAL, INTENT(out) :: nx, nidbas
     dim = sp%dim
     IF( PRESENT(nx) ) nx = sp%nints
     IF( PRESENT(nidbas) ) nidbas = sp%order - 1
   END SUBROUTINE get_dim1
 !===========================================================================
   SUBROUTINE get_dim2(sp, dim, nx, nidbas)
 !
 !  Return spline dimension of 2d spline sp and optionally
 !  number of knot intervals nx and spline degree nidbas
 !
     TYPE(spline2d), INTENT(in) :: sp
     INTEGER, INTENT(out) :: dim(2)
     INTEGER, OPTIONAL, INTENT(out) :: nx(2), nidbas(2)
 !
     dim(1) = sp%sp1%dim
     IF( PRESENT(nx) ) nx(1) = sp%sp1%nints
     IF( PRESENT(nidbas) ) nidbas(1) = sp%sp1%order - 1
 !
     dim(2) = sp%sp2%dim
     IF( PRESENT(nx) ) nx(2) = sp%sp2%nints
     IF( PRESENT(nidbas) ) nidbas(2) = sp%sp2%order - 1
   END SUBROUTINE get_dim2
 !===========================================================================
   SUBROUTINE get_gauss(sp, n, i, x, w)
 !
 !   Get Gauss points and weights from spline sp
 !
     TYPE(spline1d), INTENT(in) :: sp
     INTEGER, INTENT(out) :: n
     INTEGER, INTENT(in), OPTIONAL :: i
     DOUBLE PRECISION, DIMENSION(:), OPTIONAL, INTENT(out) :: x, w
 !
     n = SIZE(sp%gausx, 1)
     IF( PRESENT(i) ) THEN 
        x(:) = sp%gausx(:,i)
        w(:) = sp%gausw(:,i)
     END IF
   END SUBROUTINE get_gauss
 !===========================================================================
   SUBROUTINE def_basfun0(xp, sp, fun, left)
 !
 !  Define the basis function and its derivatives at x
 !  fun(i,j) = (j-1)th derivative of ith basis function.
 !
     DOUBLE PRECISION, INTENT(in) :: xp
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE PRECISION, INTENT(out) :: fun(:,:)
     INTEGER, OPTIONAL, INTENT(out) :: left
     DOUBLE PRECISION :: x
     INTEGER :: p, n, kleft
     INTEGER :: ierr, j, k
 !
     CALL locintv(sp, xp, kleft)
     CALL basfun(xp, sp, fun, kleft+1)
     IF(PRESENT(left)) THEN
        left = kleft
     END IF
   END SUBROUTINE def_basfun0
 !===========================================================================
   SUBROUTINE basfun0(xp, sp, f, left)
 !
 !  Define the basis function and its derivatives at x, in interval
 !  [left,left+1], using PPFORM defined by sp%val0., left=1,..,nints
 !    f(i,j) = jth derivative of ith basis function.
 !
     DOUBLE PRECISION, INTENT(in)   :: xp
     DOUBLE PRECISION, INTENT(out)  :: f(:,0:)
     INTEGER, INTENT(in)            :: left   ! =1,2,...,nints
     TYPE(spline1d) :: sp
 !
     INTEGER          :: p, n, jdermx, i, jder
     DOUBLE PRECISION :: x, h
 !
     p = sp%order - 1
     n = sp%nints
     jdermx = SIZE(f,2)-1
 !
     h = xp-sp%knots(left-1)  ! knots are numbered from 0
 !
     IF(sp%period .AND. sp%nlequid) THEN
        DO jder=0,jdermx   ! Derivative jder
           CALL my_ppval(p, h, sp%valc, jder, f(:,jder))
        END DO
     ELSE
        DO jder=0,jdermx   ! Derivative jder
           CALL my_ppval(p, h, sp%val0(:,:,left), jder, f(:,jder))
        END DO
     END IF
   CONTAINS
     SUBROUTINE my_ppval(p, x, ppform, jder, f)
       INTEGER, INTENT(in) :: p
       DOUBLE PRECISION, INTENT(in) :: x, ppform(:,:)
       INTEGER, INTENT(in) :: jder
       DOUBLE PRECISION, INTENT(out) :: f(:)
       DOUBLE PRECISION :: fact
       INTEGER :: j
       SELECT CASE (jder)
       CASE(0)            ! function value
          SELECT CASE(p)
          CASE(1)
             f(1) = ppform(1,1) + x*ppform(2,1)
          CASE(2)
             f(1) = ppform(1,1) + x*(ppform(2,1)+x*ppform(3,1))
             f(2) = ppform(1,2) + x*(ppform(2,2)+x*ppform(3,2))
          CASE(3)
             f(1) = ppform(1,1) + x*(ppform(2,1)+x*(ppform(3,1)+x*ppform(4,1))) 
             f(2) = ppform(1,2) + x*(ppform(2,2)+x*(ppform(3,2)+x*ppform(4,2)))
             f(3) = ppform(1,3) + x*(ppform(2,3)+x*(ppform(3,3)+x*ppform(4,3)))
         CASE(4:)
             f(1:p) = ppform(p+1,1:p)
             DO j=p,1,-1
                f(1:p) = f(1:p)*x + ppform(j,1:p)
             END DO
          END SELECT
          f(p+1) = 1.0d0 - SUM(f(1:p))
       CASE(1)            ! 1st derivative
          SELECT CASE(p)
          CASE(1)
             f(1) = ppform(2,1)
          CASE(2)
             f(1) = ppform(2,1) + x*2.d0*ppform(3,1)
             f(2) = ppform(2,2) + x*2.d0*ppform(3,2)
          CASE(3)
             f(1) = ppform(2,1) + x*(2.d0*ppform(3,1)+x*3.0d0*ppform(4,1))
             f(2) = ppform(2,2) + x*(2.d0*ppform(3,2)+x*3.0d0*ppform(4,2))
             f(3) = ppform(2,3) + x*(2.d0*ppform(3,3)+x*3.0d0*ppform(4,3))
          CASE(4:)
             f(1:p) = p*ppform(p+1,1:p)
             DO j=p-1,1,-1
                f(1:p) = f(1:p)*x + j*ppform(j+1,1:p)
             END DO
          END SELECT
          f(p+1) = -SUM(f(1:p))
       CASE default       ! 2nd and higher derivatives
          fact = p-jder
          f(1:p) = ppform(p+1,1:p)
          DO j=p,jder+1,-1
             f(1:p) = f(1:p)/fact*j*x + ppform(j,1:p)
             fact = fact-1.0d0
          END DO
          DO j=2,jder
             f(1:p) = f(1:p)*j
          END DO
          f(p+1) = -SUM(f(1:p))
       END SELECT
     END SUBROUTINE my_ppval
   END SUBROUTINE basfun0
 !===========================================================================
   SUBROUTINE basfun1(xp, sp, f, left)
 !
 !  Define the basis function and its derivatives at x, in interval i=1,2,
 !  using PPFORM defined by sp%val0.
 !    f(i,j,p) = jth derivative of ith basis function at coordinate xp
 !
     DOUBLE precision, INTENT(in)   :: xp(:)
     DOUBLE PRECISION, INTENT(out)  :: f(0:,0:,:)
     INTEGER, INTENT(in)            :: left(:)  ! =1,2,...,nints
     TYPE(spline1d) :: sp
 !
     INTEGER          :: p, n, kleft, i, j, jder, ierr
     INTEGER :: npt, jdermx
     DOUBLE PRECISION :: h(SIZE(xp)), temp(SIZE(xp))
     DOUBLE PRECISION :: ppform(SIZE(xp),sp%order)
 !
     p = sp%order - 1
     n = sp%nints
     npt = SIZE(xp)
     jdermx = SIZE(f,2)-1
 !
     h = xp - sp%knots(left-1)  ! knots are numbered from 0
 !
     IF( sp%period .AND. sp%nlequid) THEN
        DO jder=0,jdermx
           CALL my_ppval_same(p, h, sp%valc, jder, f(:,jder,1:npt))
        END DO
     ELSE
        DO i=0,p                  ! Spline i
           DO j=1,npt
              ppform(j,:) = sp%val0(:,i+1,left(j))
           END DO
           DO jder=0,jdermx       ! Derivative jder
              CALL my_ppval(p, h, ppform, jder, temp)
              f(i,jder,1:npt) = temp
           END DO
        END DO
     END IF
   CONTAINS
 !+++
     SUBROUTINE my_ppval(p, x, ppform, jder, f)
 !
 !   Compute function and derivatives from the PP representation
 !   for many points x(:)
       INTEGER, INTENT(in) :: p
       DOUBLE PRECISION, INTENT(in) :: x(:)
       DOUBLE PRECISION, INTENT(in) :: ppform(:,:)
       INTEGER, INTENT(in) :: jder
       DOUBLE PRECISION, INTENT(out) :: f(:)
       DOUBLE PRECISION :: fact
       INTEGER :: j
       SELECT CASE (jder)
       CASE(0)            ! function value
          SELECT CASE(p)
          CASE(1)
             f(:) = ppform(:,1) + x(:)*ppform(:,2)
          CASE(2)
             f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*ppform(:,3))
          CASE(3)
             f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*(ppform(:,3)+x(:)*ppform(:,4)))
          CASE(4:)
             f(:) = ppform(:,p+1)
             DO j=p,1,-1
                f(:) = ppform(:,j) + f(:)*x(:)
             END DO
          END SELECT
       CASE(1)            ! 1st derivative
          SELECT CASE(p)
          CASE(1)
             f(:) = ppform(:,2)
          CASE(2)
             f(:) = ppform(:,2) + x(:)*2.d0*ppform(:,3)
          CASE(3)
             f(:) = ppform(:,2) + x(:)*(2.d0*ppform(:,3)+x(:)*3.0d0*ppform(:,4))
          CASE(4:)
             f(:) = p*ppform(:,p+1)
             DO j=p-1,1,-1
                f(:) = f(:)*x(:) + j*ppform(:,j+1)
             END DO
          END SELECT
       CASE default       ! 2nd and higher derivatives
          f(:) = ppform(:,p+1)
          fact = p-jder
          DO j=p,jder+1,-1
             f(:) = f(:)/fact*j*x(:) + ppform(:,j)
             fact = fact-1.0d0
          END DO
          DO j=2,jder
             f(:) = f(:)*j
          END DO
       END SELECT
     END SUBROUTINE my_ppval
 !+++
     SUBROUTINE my_ppval_same(p, x, ppform, jder, f)
 !
 !   Compute function and derivatives from the PP representation
 !   for many points x(:), same ppform (translationnal invariant spline)
     INTEGER, INTENT(in) :: p
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE PRECISION, INTENT(in) :: ppform(:,:)
     INTEGER, INTENT(in) :: jder
     DOUBLE PRECISION, INTENT(out) :: f(:,:)
     DOUBLE PRECISION :: fact
     INTEGER :: j,k
     SELECT CASE (jder)
 !
 ! function value
     CASE(0)            
        SELECT CASE(p)
        CASE(1)
           f(1,:) = ppform(1,1) + x(:)*ppform(2,1)
        CASE(2)
           f(1,:) = ppform(1,1) + x(:)*(ppform(2,1)+x(:)*ppform(3,1))
           f(2,:) = ppform(1,2) + x(:)*(ppform(2,2)+x(:)*ppform(3,2))
        CASE(3)
           f(1,:) = ppform(1,1) + x(:)*(ppform(2,1)+x(:)*(ppform(3,1)+x(:)*ppform(4,1)))
           f(2,:) = ppform(1,2) + x(:)*(ppform(2,2)+x(:)*(ppform(3,2)+x(:)*ppform(4,2)))
           f(3,:) = ppform(1,3) + x(:)*(ppform(2,3)+x(:)*(ppform(3,3)+x(:)*ppform(4,3)))
        CASE(4:)
           DO k=1,p
              f(k,:) = ppform(p+1,k)
              DO j=p,1,-1
                 f(k,:) = ppform(j,k) + f(k,:)*x(:)
              END DO
           END DO
        END SELECT
        f(p+1,:) = 1.0d0 - SUM(f(1:p,:),DIM=1)
 !
 ! 1st derivative
     CASE(1)         
        SELECT CASE(p)
        CASE(1)
           f(1,:) = ppform(2,1)
        CASE(2)
           f(1,:) = ppform(2,1) + x(:)*2.d0*ppform(3,1)
           f(2,:) = ppform(2,2) + x(:)*2.d0*ppform(3,2)
        CASE(3)
           f(1,:) = ppform(2,1) + x(:)*(2.d0*ppform(3,1)+x(:)*3.0d0*ppform(4,1))
           f(2,:) = ppform(2,2) + x(:)*(2.d0*ppform(3,2)+x(:)*3.0d0*ppform(4,2))
           f(3,:) = ppform(2,3) + x(:)*(2.d0*ppform(3,3)+x(:)*3.0d0*ppform(4,3))
        CASE(4:)
           DO k=1,p
              f(k,:) = p*ppform(p+1,k)
              DO j=p-1,1,-1
                 f(k,:) = f(k,:)*x(:) + j*ppform(j+1,k)
              END DO
           END DO
        END SELECT
        f(p+1,:) = -SUM(f(1:p,:),DIM=1)
 !
 ! 2nd and higher derivatives
        CASE(2:)        
           DO k=1,p
              f(k,:) = ppform(p+1,k)
              fact = p-jder
              DO j=p,jder+1,-1
                 f(k,:) = f(k,:)/fact*j*x(:) + ppform(j,k)
                 fact = fact-1.0d0
              END DO
              DO j=2,jder
                 f(k,:) = f(k,:)*j
              END DO
           END DO
           f(p+1,:) = -SUM(f(1:p,:),DIM=1)
        END SELECT
      END SUBROUTINE my_ppval_same
 !+++
   END SUBROUTINE basfun1
 !===========================================================================
   SUBROUTINE def_basfun1(xp, sp, fun, left)
 !
 !  Define the basis function and its derivatives at x
 !  fun(i,j) = (j-1)th derivative of ith basis function.
 !
     DOUBLE PRECISION, INTENT(in) :: xp(:)
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE PRECISION, INTENT(out) :: fun(:,:,:)
     INTEGER, OPTIONAL, INTENT(out) :: left(:)
     DOUBLE PRECISION :: x(SIZE(xp))
     INTEGER :: kleft(SIZE(xp))
     INTEGER :: p, n
     INTEGER :: ierr, j, k
 !
     CALL locintv(sp, xp, kleft)
     CALL basfun(xp, sp, fun, kleft+1)
     IF(PRESENT(left)) THEN
        left = kleft
     END IF
   END SUBROUTINE def_basfun1
 !===========================================================================
   SUBROUTINE ft_basfun0(xp, sp, ft_f, left)
 !
 !  DFT of basis functions: ft_f(k,j), k=sp%dft%kmin, sp$dft%kmax (modes)
 !                                     j=0, p-1 (order of derivative))
 !
     DOUBLE PRECISION, INTENT(in)   :: xp
     DOUBLE COMPLEX, INTENT(out)    :: ft_f(:,:)
     INTEGER, INTENT(in)            :: left
     TYPE(spline1d)                 :: sp
     DOUBLE PRECISION :: f(sp%order,SIZE(ft_f,2))
 !
 !  Construct all splines on interval [left,left+1] at coordinate  xp
     CALL basfun(xp, sp, f, left)
 !
 !  DFT of splines
     ft_f = MATMUL(sp%dft%coefs, f)    
   END SUBROUTINE ft_basfun0
 !===========================================================================
   SUBROUTINE ft_basfun1(xp, sp, ft_f, left)
 !
 !  DFT of basis functions: ft_f(k,j), k=sp%dft%kmin, sp$dft%kmax (modes)
 !                                     j=0, p-1 (order of derivative))
 !                          at xp(i)
 !
     DOUBLE PRECISION, INTENT(in)   :: xp(:)
     DOUBLE COMPLEX, INTENT(out)    :: ft_f(:,:,:)
     INTEGER, INTENT(in)            :: left(:)
     TYPE(spline1d)                 :: sp
 !
     INTEGER :: i, n3
     DOUBLE PRECISION :: f(sp%order,SIZE(ft_f,2),SIZE(ft_f,3))
 !
 !  Construct all splines on interval [left,left+1] at coordinate  xp
     CALL basfun(xp, sp, f, left)
 !
 !  DFT of splines
     n3 = SIZE(xp)
     DO i=1,n3
        ft_f(:,:,i) = MATMUL(sp%dft%coefs, f(:,:,i))
     END DO
   END SUBROUTINE ft_basfun1
 !===========================================================================
   SUBROUTINE basfun_recur(xp, sp, fun, left)
 !
 !  Define the basis function and its derivatives at x, in interval i=1,2,
 !  using recurrence construct in function BVALUE
 !  fun(i,j) = (j-1)th derivative of ith basis function.
 !
     DOUBLE PRECISION, INTENT(in) :: xp
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE PRECISION, INTENT(out) :: fun(:,:)
     INTEGER, INTENT(in) :: left
     DOUBLE PRECISION :: bcoef(1)=1.0d0, bvalue, x
     INTEGER :: p, n, kleft
     INTEGER :: ierr, j, k
 !
     p = sp%order - 1
     n = sp%nints
     fun = 0.0d0
 !
     IF( sp%period ) THEN   !  ** Applly periodicity **
        x =sp%knots(0) + MODULO(xp-sp%knots(0), sp%lperiod)
     ELSE
        x = xp
     END IF
 !
     kleft = left-1
     DO j=kleft-p, kleft
        DO k=0, SIZE(fun,2)-1
           fun(j-kleft+p+1, k+1) = bvalue(sp%knots(j), bcoef, 1, p+1, x, k) 
        END DO
     END DO
   END SUBROUTINE basfun_recur
 !===========================================================================
   SUBROUTINE gauleg(x1,x2,x,w,n)
 !
 !   Compute Gauss-Legendre abscissas and weights in interval [x1, x2]
 !
     INTEGER, INTENT(in) :: n
     DOUBLE PRECISION, INTENT(in) :: x1,x2
     DOUBLE PRECISION, INTENT(out) :: x(n),w(n)
     DOUBLE PRECISION :: EPS
     INTEGER i,j,m
     DOUBLE PRECISION p1,p2,p3,pp,xl,xm,z,z1
 !
     eps=EPSILON(eps)
     m=(n+1)/2
     xm=0.5d0*(x2+x1)
     xl=0.5d0*(x2-x1)
     DO i=1,m
        z=COS(3.141592654d0*(i-.25d0)/(n+.5d0))
        DO
           p1=1.d0; p2=0.d0
           DO j=1,n
              p3=p2; p2=p1
              p1=((2.d0*j-1.d0)*z*p2-(j-1.d0)*p3)/j
           END DO
           pp=n*(z*p1-p2)/(z*z-1.d0)
           z1=z
           z=z1-p1/pp
           IF( ABS(z-z1) .LE. EPS ) EXIT
        END DO
        x(i)=xm-xl*z
        x(n+1-i)=xm+xl*z
        w(i)=2.d0*xl/((1.d0-z*z)*pp*pp)
        w(n+1-i)=w(i)
     END DO
   END SUBROUTINE gauleg
 !===========================================================================
   SUBROUTINE gridval1dz(sp, xp, f, jder, c, ppformz)
 !
 !  Compute values or jder-th dervivative of f(x) from ppform 
 !  of spline sp. Recompute the ppform if the optional spline 
 !  coefficients are given.
 !
     TYPE(spline1d) :: sp
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp
     DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: f
     INTEGER, INTENT(in) :: jder
     DOUBLE COMPLEX, DIMENSION(:), OPTIONAL, INTENT(in) :: c
     DOUBLE COMPLEX, DIMENSION(:,:), OPTIONAL :: ppformz
     DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
     DOUBLE PRECISION :: x(SIZE(xp)), h, fact
     INTEGER :: order, nints, i, j, nidbas
     INTEGER :: leftx(SIZE(xp))
 !
     order = sp%order
     nints = sp%nints
     nidbas = order-1
 !
 !   Compute PPFORM/BCOEFS if spline coefs are passed
 !
     IF (PRESENT(c)) THEN
        IF (sp%nlppform) THEN
           IF( PRESENT(ppformz) ) THEN
              CALL topp0z(sp, c, ppformz)
           ELSE
              IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz)
              ALLOCATE(sp%ppformz(order,nints))
              CALL topp0z(sp, c, sp%ppformz)
           END IF
        ELSE
           IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc)
           ALLOCATE(sp%bcoefsc(SIZE(c)))
           sp%bcoefsc = c
        END IF
     END IF
 !
 !  Applly periodicity if required
 !
     IF( sp%period ) THEN
        x =sp%knots(0) + MODULO(xp-sp%knots(0), sp%lperiod)
     ELSE
        x = xp
     END IF
 !
 !  Locate the intervals containing x
 !
     CALL locintv(sp, x, leftx)
 !
 !  Compute function/derivatives
 !
     IF( sp%nlppform ) THEN   ! using PP form
        DO i=1,SIZE(x)
           IF( PRESENT(ppformz) ) THEN
              CALL ppval(sp, x(i), ppformz(:,leftx(i)+1), leftx(i), jder, f(i))
           ELSE
              CALL ppval(sp, x(i), sp%ppformz(:,leftx(i)+1), leftx(i), jder, f(i))
           END IF
        END DO
     ELSE                     ! using spline expansion
        ALLOCATE(fun(0:nidbas,0:jder))
        f = 0.0d0
        DO i=1,SIZE(x)
           CALL basfun(x(i), sp, fun, leftx(i)+1)
           DO j=0,nidbas
              f(i) = f(i) + sp%bcoefsc(leftx(i)+j+1)*fun(j,jder)
           END DO
        END DO
        DEALLOCATE(fun)
     END IF
 !
   END SUBROUTINE gridval1dz
 !===========================================================================
   SUBROUTINE gridval1d(sp, xp, f, jder, c, ppform)
 !
 !  Compute values or jder-th dervivative of f(x) from ppform 
 !  of spline sp. Recompute the ppform if the optional spline 
 !  coefficients are given.
 !
     TYPE(spline1d) :: sp
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp
     DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: f
     INTEGER, INTENT(in) :: jder
     DOUBLE PRECISION, DIMENSION(:), OPTIONAL, INTENT(in) :: c
     DOUBLE PRECISION, DIMENSION(:,:), OPTIONAL :: ppform
     DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
     DOUBLE PRECISION :: x(SIZE(xp)), h, fact
     INTEGER :: order, nints, i, j, nidbas
     INTEGER :: leftx(SIZE(xp))
 !
     order = sp%order
     nints = sp%nints
     nidbas = order-1
 !
 !   Compute PPFORM/BCOEFS if spline coefs are passed
 !
     IF (PRESENT(c)) THEN
        IF (sp%nlppform) THEN
           IF( PRESENT(ppform) ) THEN
              CALL topp0(sp, c, ppform)
           ELSE
              IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform)
              ALLOCATE(sp%ppform(order,nints))
              CALL topp0(sp, c, sp%ppform)
           END IF
        ELSE
           IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs)
           ALLOCATE(sp%bcoefs(SIZE(c)))
           sp%bcoefs = c
        END IF
     END IF
 !
 !  Applly periodicity if required
 !
     IF( sp%period ) THEN
        x =sp%knots(0) + MODULO(xp-sp%knots(0), sp%lperiod)
     ELSE
        x = xp
     END IF
 !
 !  Locate the intervals containing x
 !
     CALL locintv(sp, x, leftx)
 !
 !  Compute function/derivatives
 !
     IF( sp%nlppform ) THEN   ! using PP form
        DO i=1,SIZE(x)
           IF( PRESENT(ppform) ) THEN
              CALL ppval(sp, x(i), ppform(:,leftx(i)+1), leftx(i), jder, f(i))
           ELSE
              CALL ppval(sp, x(i), sp%ppform(:,leftx(i)+1), leftx(i), jder, f(i))
           END IF
        END DO
     ELSE                     ! using spline expansion
        ALLOCATE(fun(0:nidbas,0:jder))
        f = 0.0d0
        DO i=1,SIZE(x)
           CALL basfun(x(i), sp, fun, leftx(i)+1)
           DO j=0,nidbas
              f(i) = f(i) + sp%bcoefs(leftx(i)+j+1)*fun(j,jder)
           END DO
        END DO
        DEALLOCATE(fun)
     END IF
 !
   END SUBROUTINE gridval1d
 !===========================================================================
   SUBROUTINE def_knots(p, xg, knots, period, nlskip)
 !
 !   Define spline knots for interpolating at sites given by xg
 !
     INTEGER, INTENT(in) :: p
     DOUBLE PRECISION, INTENT(in) :: xg(0:)
     DOUBLE PRECISION, POINTER :: knots(:)
     LOGICAL, OPTIONAL, INTENT(in) :: period, nlskip
     LOGICAL :: kperiod, mlskip
     INTEGER :: npt, dim, nx, i, ii
 !
     kperiod=.FALSE.
     mlskip = .TRUE.
     IF( PRESENT(period) ) kperiod=period
     IF( PRESENT(nlskip) ) mlskip = nlskip
 !
 !    Periodic splines
 !
     IF( kperiod ) THEN
        nx = SIZE(xg) -1
        IF( ASSOCIATED(knots) ) DEALLOCATE(knots)
        ALLOCATE(knots(0:nx))
        IF( MODULO(p,2) .NE. 0 ) THEN  ! Odd degree
           knots(0:nx) = xg(0:nx)
        ELSE                           ! Even degree
           DO i=1,nx
              knots(i) = 0.5d0*(xg(i-1)+xg(i))
           END DO
           knots(0) = knots(nx) - (xg(nx)-xg(0))
        END IF
        RETURN
     END IF
 !
 !    Non-periodic splines
 !
     npt = SIZE(xg)
     dim = npt
     IF( .NOT. mlskip ) THEN
        dim = dim + 2*(p/2) ! Add BC on derivatives
     END IF
     nx = dim-p
     IF( ASSOCIATED(knots) ) DEALLOCATE(knots)
     ALLOCATE(knots(0:nx))
 !
     knots(0) = xg(0)
     knots(nx) = xg(npt-1)
 !
     IF( MODULO(p,2) .EQ. 0 ) THEN
        ii = 0
        IF( mlskip ) ii = p/2     ! skip first p/2 intervals
        DO i=1,nx-1
           ii = ii+1
           knots(i) = (xg(ii)+xg(ii-1))/2
        END DO
     ELSE
        ii = 0
        IF( mlskip ) ii = (p-1)/2  ! skip (p-1)/2 points after the first point ii=0
        DO i=1,nx-1
           ii = ii+1
           knots(i) = xg(ii)
        END DO
     END IF
 !
   END SUBROUTINE def_knots
 !===========================================================================
   SUBROUTINE allsplines(sp, xpt, splines)
 !
 !   Return all splines defined on points xpt
 !
     TYPE(spline1d) :: sp
     DOUBLE PRECISION, INTENT(in) :: xpt(:)
     DOUBLE PRECISION, POINTER :: splines(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
     INTEGER :: i, n, left, dim, p
 !
     p = sp%order - 1
     dim = sp%dim
     n = SIZE(xpt)
 !
     IF( ASSOCIATED(splines) ) DEALLOCATE(splines)
     ALLOCATE(splines(n,dim), fun(p+1,1))
     splines = 0.0d0
     DO i=1,n
        CALL locintv(sp, xpt(i), left)
        CALL basfun(xpt(i), sp, fun, left+1)
        splines(i,left+1:left+p+1) = fun(1:p+1,1)
     END DO
     DEALLOCATE(fun)
   END SUBROUTINE allsplines
 !===========================================================================
   SUBROUTINE set_splcoef1d(p, x, sp, period, ibc)
 !
 !   Setup 1d interpolation matrix for spline of degree p
 !
     INTEGER, INTENT(in) :: p
     DOUBLE PRECISION, INTENT(in) :: x(:)
     TYPE(spline1d), INTENT(out) :: sp
     LOGICAL, OPTIONAL, INTENT(in) :: period
     INTEGER, OPTIONAL :: ibc(:,:)
 !
     LOGICAL :: kperiod
 !
     kperiod = .FALSE.
     IF( PRESENT(period) )  kperiod = period
 !
     IF( kperiod ) THEN
        CALL splcoefp_setup(p, x, sp)
     ELSE
        IF( PRESENT(ibc) ) THEN 
           CALL splcoef_setup(p, x, sp, ibc)
        ELSE
           CALL splcoef_setup(p, x, sp)
        END IF
     END IF
   END SUBROUTINE set_splcoef1d
 !===========================================================================
   SUBROUTINE set_splcoef2d(p, x1, x2, sp, period, ibc1, ibc2)
 !
 !   Setup 2d interpolation matrix for spline of degree p
 !
     INTEGER, INTENT(in) :: p(2)
     DOUBLE PRECISION, INTENT(in) :: x1(:), x2(:)
     TYPE(spline2d), INTENT(out) :: sp
     LOGICAL, OPTIONAL, INTENT(in) :: period(2)
     INTEGER, OPTIONAL :: ibc1(:,:),ibc2(:,:) 
 !
     LOGICAL :: kperiod(2)
 !
     kperiod = .FALSE.
     IF( PRESENT(period) )  kperiod = period
 !
 !  Direction 1
     IF( kperiod(1) ) THEN
        CALL splcoefp_setup(p(1), x1, sp%sp1)
     ELSE
        IF( PRESENT(ibc1) ) THEN 
           CALL splcoef_setup(p(1), x1, sp%sp1, ibc1)
        ELSE
           CALL splcoef_setup(p(1), x1, sp%sp1)
        END IF
     END IF
 !
 !  Direction 2
     IF( kperiod(2) ) THEN
        CALL splcoefp_setup(p(2), x2, sp%sp2)
     ELSE
        IF( PRESENT(ibc2) ) THEN 
           CALL splcoef_setup(p(2), x2, sp%sp2, ibc2)
        ELSE
           CALL splcoef_setup(p(2), x2, sp%sp2)
        END IF
     END IF
   END SUBROUTINE set_splcoef2d
 !===========================================================================
   SUBROUTINE get_splcoef1(sp, f, c, fbc)
 !
 !   Compute the spline coefficients c from grid values f
 !
     TYPE(spline1d) :: sp
     DOUBLE PRECISION, INTENT(in) :: f(:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
     DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc(:,:)
 !
     IF( sp%period ) THEN
        CALL splcoefp1(sp, f, c)
     ELSE
        IF( PRESENT(fbc) ) THEN
           CALL splcoef1(sp, f, c, fbc)
        ELSE
           CALL splcoef1(sp, f, c)
        END IF
     END IF
   END SUBROUTINE get_splcoef1
 !===========================================================================
   SUBROUTINE get_splcoef2d(sp, f, c, fbc1, fbc2)
 !
 !   Compute the spline coefficients c from 2d grid values f
 !
     TYPE(spline2d) :: sp
     DOUBLE PRECISION, INTENT(in) :: f(:,:)
     DOUBLE PRECISION, INTENT(out) :: c(:,:)
     DOUBLE PRECISION :: ctr(SIZE(c,2), SIZE(f,1))
     DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc1(:,:,:), fbc2(:,:,:)
 
     DOUBLE PRECISION, DIMENSION(:, :)   , ALLOCATABLE :: c_fbc1_left, c_fbc1_right
     DOUBLE PRECISION, DIMENSION(:, :, :), ALLOCATABLE :: c_fbc1_all
 !
 !  Along direction 2
 !
     IF( PRESENT(fbc2) ) THEN
        CALL get_splcoefn(sp%sp2, TRANSPOSE(f), ctr, fbc2)
     ELSE
        CALL get_splcoefn(sp%sp2, TRANSPOSE(f), ctr)
     END IF
 !
 !  Along direction 1
 !
     IF( PRESENT(fbc1) ) THEN
        ALLOCATE( c_fbc1_left(SIZE(c, 2), SIZE(fbc1, 2)))
        ALLOCATE(c_fbc1_right(SIZE(c, 2), SIZE(fbc1, 2)))
        ALLOCATE(c_fbc1_all(2, SIZE(fbc1, 2), SIZE(c, 2)))
 
        CALL get_splcoefn(sp%sp2, TRANSPOSE(fbc1(1, :, :)), c_fbc1_left )
        CALL get_splcoefn(sp%sp2, TRANSPOSE(fbc1(2, :, :)), c_fbc1_right)
 
        c_fbc1_all(1, :, :) = TRANSPOSE(c_fbc1_left ) 
        c_fbc1_all(2, :, :) = TRANSPOSE(c_fbc1_right) 
 
        CALL get_splcoefn(sp%sp1, TRANSPOSE(ctr), c, c_fbc1_all)
 
        DEALLOCATE(c_fbc1_left, c_fbc1_right, c_fbc1_all)
     ELSE
        CALL get_splcoefn(sp%sp1, TRANSPOSE(ctr), c)
     END IF
 !
   END SUBROUTINE get_splcoef2d
 !===========================================================================
   SUBROUTINE get_splcoef2dz(sp, f, c)
 !
 !   Compute the spline coefficients c from 2d grid values f
 !
     TYPE(spline2d) :: sp
     DOUBLE COMPLEX, INTENT(in) :: f(:,:)
     DOUBLE COMPLEX, INTENT(out) :: c(:,:)
     DOUBLE PRECISION, DIMENSION(SIZE(c,1), SIZE(c,2),2) :: pc
 !
     CALL get_splcoef2d(sp, REAL(f), pc(:,:,1))
     CALL get_splcoef2d(sp, AIMAG(f), pc(:,:,2))
     c(:,:) = CMPLX(pc(:,:,1),pc(:,:,2))
   END SUBROUTINE get_splcoef2dz
 !===========================================================================
   SUBROUTINE get_splcoef1z(sp, f, c, fbc)
 !
 !   Compute the spline coefficients c from grid values f
 !
     TYPE(spline1d) :: sp
     DOUBLE COMPLEX, INTENT(in) :: f(:)
     DOUBLE COMPLEX, INTENT(out) :: c(:)
     DOUBLE COMPLEX, INTENT(in), OPTIONAL :: fbc(:,:)
     DOUBLE PRECISION :: pf(SIZE(f),2), pc(SIZE(c),2)
     DOUBLE PRECISION, ALLOCATABLE ::  pfbc(:,:,:)
 !
     pf(:,1) = REAL(f(:))
     pf(:,2) = AIMAG(f(:))
     IF(PRESENT(fbc)) THEN
        ALLOCATE(pfbc(SIZE(fbc,1),SIZE(fbc,2),2))
        pfbc(:,:,1) = REAL(fbc(:,:))
        pfbc(:,:,2) = AIMAG(fbc(:,:))
        CALL get_splcoefn(sp, pf, pc, pfbc)
        DEALLOCATE(pfbc)
     ELSE
        CALL get_splcoefn(sp, pf, pc)       
     END IF
     c(:) = CMPLX(pc(:,1), pc(:,2))
 !
   END SUBROUTINE get_splcoef1z
 !===========================================================================
   SUBROUTINE get_splcoefn(sp, f, c, fbc)
 !
 !   Compute the spline coefficients c from grid values f
 !
     TYPE(spline1d) :: sp
     DOUBLE PRECISION, INTENT(in) :: f(:,:)
     DOUBLE PRECISION, INTENT(out) :: c(:,:)
     DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc(:,:,:)
 !
     IF( sp%period ) THEN
        CALL splcoefpn(sp, f, c)
     ELSE
        IF( PRESENT(fbc) ) THEN
           CALL splcoefn(sp, f, c, fbc)
        ELSE
           CALL splcoefn(sp, f, c)
        END IF
     END IF
   END SUBROUTINE get_splcoefn
 !===========================================================================
    SUBROUTINE splcoef_setup(p, x, sp, ibc)
 !
 !   Setup the interpolation matrix
 !   for spline of degree p
 !
     INTEGER, INTENT(in) :: p
     DOUBLE PRECISION, INTENT(in) :: x(:)
     TYPE(spline1d), INTENT(out) :: sp
     INTEGER, OPTIONAL :: ibc(:,:)
     DOUBLE PRECISION, POINTER  :: knots(:)=>NULL(), arow(:)=>NULL(), &
          &                        fun(:,:)=>NULL()
     INTEGER :: nx, dim, kl, ku, rank
     INTEGER :: i, left, ishift
     LOGICAL :: nlskip
 !
 !    Type of Boundary Conditions
     nlskip = .TRUE.
     ishift = 0
     IF( PRESENT(ibc) ) THEN
        nlskip = .FALSE.
        ishift = p/2
     END IF
 !
 !    Set up spline
     nx = SIZE(x) - 1   ! X is the interpolation sites
     CALL def_knots(p, x, knots, nlskip=nlskip)
     CALL set_spline(p, 0, knots, sp)
     sp%nsites = nx + 1 ! Store away the number of interpolation sites
     DEALLOCATE(knots)
 !
 !    Set up interpolation matrix
     dim = sp%dim
     kl = MAX(p-1,0)
     ku = MAX(p-1,0)
     rank = dim
     CALL init(kl, ku, rank, 0, sp%mat)
 !!$    WRITE(*,'(a,3i6)') 'Interpolation matrix:, kl, ku, rank ', kl, ku, rank
 !
 !    COMPUTE matrix row by row
     ALLOCATE(arow(dim), fun(p+1,0:p))
     DO i=1,SIZE(x)
        arow = 0.0d0
        CALL locintv(sp, x(i), left)
        CALL basfun(x(i), sp, fun(:,0:0), left+1)
        arow(left+1:left+p+1) = fun(1:p+1,0)
        CALL putrow(sp%mat, i+ishift, arow)
 !!$       WRITE(*,'(i5,13f8.3)') i+ishift, arow
     END DO
 !
 !    Add BC if specified
     IF( PRESENT(ibc) ) THEN
        CALL locintv(sp, x(1), left)
        CALL basfun(x(1), sp, fun, left+1)      ! BC at the left side
        DO i=1,p/2
           arow = 0.0d0
           arow(left+1:left+p+1) = fun(1:p+1,ibc(1,i))
           CALL putrow(sp%mat, i, arow)
 !!$          WRITE(*,'(i5,13f8.3)') i, arow
        END DO
        CALL locintv(sp, x(SIZE(x)), left)
        CALL basfun(x(SIZE(x)), sp, fun, left+1) ! BC at the right side
        DO i=1,p/2
           arow = 0.0d0
           arow(left+1:left+p+1) = fun(1:p+1,ibc(2,i))
           CALL putrow(sp%mat, dim-i+1, arow)
 !!$          WRITE(*,'(i5,13f8.3)') dim-i+1, arow
        END DO
     END IF
     DEALLOCATE(arow, fun)
 !
 !    Factor the matrix
     CALL factor(sp%mat)
 !
   END SUBROUTINE splcoef_setup
 !===========================================================================
   SUBROUTINE splcoefp_setup(p, x, sp)
 !
 !   Set up the interpolation matrix
 !   for periodic case
 !
     USE matrix
     INTEGER, INTENT(in) :: p
     DOUBLE PRECISION, INTENT(in) :: x(:)
     TYPE(spline1d), INTENT(out) :: sp
 !
     TYPE(gemat) :: hmat
     DOUBLE PRECISION, POINTER :: knots(:)=>NULL(), arow(:)=>NULL(), &
          &                       fun(:,:)=>NULL()
 !!$    DOUBLE PRECISION, POINTER :: arr2d(:,:)=>null()
     INTEGER :: nx, kl, ku, rank, mr, nc
     INTEGER :: i, left, j, jj
     DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0
 !________________________________________________________________________________
 !
 !    Set up spline
 !
     nx = SIZE(x) - 1   ! X is the interpolation sites
     CALL def_knots(p, x, knots, period=.TRUE.)
     CALL set_spline(p, 0, knots, sp, period=.TRUE.)
     sp%nsites = nx + 1 ! Store away the number of interpolation sites
     DEALLOCATE(knots)
 !________________________________________________________________________________
 !
 !    Set up interpolation matrix sp%matp
 !
     kl = MAX(p/2,0)
     ku = kl
     rank = nx
 !!$    WRITE(*,'(a,3i6)') 'Interpolation matrix:, kl, ku, rank ', kl, ku, rank
 !
     CALL init(kl, ku, rank, 0, sp%matp%mat)    ! matp%mat is a GB matrix
     ALLOCATE(sp%matp%matu(rank, kl+ku), sp%matp%matvt(kl+ku,rank))
 !
     sp%matp%matu = zero
     sp%matp%matvt = zero             !    kl = ku = 2
     DO j=1,kl                        !  [ 1 0 0 . . . . ]
        sp%matp%matu(j,j) = one       !  [ 0 1 0 . . . . ]
     END DO                           !  [ . 0 . . . . . ]
     DO j=1,ku                        !  [ . . . . . 0 . ]
        i=rank-ku+j                   !  [ . . . . 0 1 0 ]
        sp%matp%matu(i,kl+j) = one    !  [ . . . . 0 0 1 ] 
     END DO
 !________________________________________________________________________________
 !
 !    COMPUTE matrix row by row
 !
     ALLOCATE(arow(rank), fun(p+1,1))
     DO i=1,rank
        arow = zero
        CALL locintv(sp, x(i), left)
        CALL basfun(x(i), sp, fun, left+1)
        left = left-p/2
        DO j=0,p
           jj = MODULO(left+j, rank) + 1
           arow(jj) = fun(j+1,1)
        END DO
        CALL putrow(sp%matp%mat, i, arow)
        IF( i .LE. kl ) THEN 
           sp%matp%matvt(i,rank-kl+1:rank) = arow(rank-kl+1:rank)
        ELSE IF ( i .GE. rank-ku+1 ) THEN
           j = i-(rank-ku+1) + 1
           sp%matp%matvt(kl+j,1:ku) = arow(1:ku)
        END IF
 !!$       WRITE(8, '(i5, 12(1pe12.3))') left, x(i), fun
 !!$       WRITE(*,'(i5, 12(1pe12.3))') i, arow
     END DO
     DEALLOCATE(arow, fun)
 !
 !!$    PRINT*, 'Matrix U, V'
 !!$    DO i=1,rank
 !!$       WRITE(*, '(i5,12(1pe12.3))') i, sp%matp%matu(i,:), sp%matp%matvt(:,i)
 !!$    END DO
 !!$    ALLOCATE(arr2d(rank,rank))
 !!$    arr2d = MATMUL(sp%matp%matu, sp%matp%matvt)
 !!$    PRINT*, 'Product U*V^T'
 !!$    DO i=1,rank
 !!$       WRITE(*, '(i5,12(1pe12.3))') i, arr2d(i,:)
 !!$    END DO
 !!$    DEALLOCATE(arr2d)
 !________________________________________________________________________________
 !
 !     Factorisation
 !
 ! Factor A
     CALL factor(sp%matp%mat)
 !
 !  For constant and linear splines, A is diagnonal!
 !  Should skip the rest
 !
     IF( kl.EQ.0 .OR. ku.EQ.0 ) THEN 
        RETURN 
     END IF
 !
 ! U <-- A^(-1) * U
     CALL bsolve(sp%matp%mat, sp%matp%matu)
 !
 ! H <-- 1 + V^T * U
     mr = SIZE(sp%matp%matvt, 1)
     nc = SIZE(sp%matp%matvt, 2)
     CALL init(mr, 0, hmat)  ! hmat is initialized to 0!
     DO i=1,mr
        hmat%val(i,i) = one
     END DO
     CALL dgemm('N', 'N', mr, mr, nc, one, sp%matp%matvt, mr, &
          &      sp%matp%matu, nc, one, hmat%val, mr)
 !
 !!$    hmat%val = MATMUL(sp%matp%matvt, sp%matp%matu)
 !!$    DO i=1,kl+ku
 !!$       hmat%val(i,i) = 1.0d0 + hmat%val(i,i)
 !!$    END DO
 !
 ! V^T <-- H^(-1) V^T
     CALL factor(hmat)
     CALL bsolve(hmat, sp%matp%matvt)
     CALL destroy(hmat)
 !
   END SUBROUTINE splcoefp_setup
 !===========================================================================
   SUBROUTINE splcoef1(sp, f, c, fbc)
 !
 !   Compute the spline coefficients c from grid values f and BC fbc
 !
     TYPE(spline1d) :: sp
     DOUBLE PRECISION, INTENT(in)  :: f(:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
     DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc(:,:)
     INTEGER :: p, dim, i, ishift
 !
     p = sp%order-1
     dim = sp%dim
 !
 !    BC at left and right boundary
     ishift = 0
     IF( PRESENT(fbc) ) THEN
        DO i=1,p/2
           c(i)       = fbc(1,i)     ! Left boundary
           c(dim-i+1) = fbc(2,i)     ! Right boundary
        END DO
        ishift = p/2
     END IF
 !
 !    Interior points
     DO i=1,sp%nsites
        c(i+ishift) = f(i)
     END DO
 !    WRITE(*,'(a/(13f8.3))') 'RHS', c
 !
 !    Solve for the interpolation coefs. using the factored sp%mat
     CALL bsolve(sp%mat, c)
 !
   END SUBROUTINE splcoef1
 !===========================================================================
   SUBROUTINE splcoefn(sp, f, c, fbc)
 !
 !   Compute the spline coefficients c from grid values f and BC fbc
 !
     TYPE(spline1d) :: sp
     DOUBLE PRECISION, INTENT(in)  :: f(:,:)
     DOUBLE PRECISION, INTENT(out) :: c(:,:)
     DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc(:,:,:)
     INTEGER :: p, dim, i, ishift
 !
     p = sp%order-1
     dim = sp%dim
 !
 !    BC at left and right boundary
     ishift = 0
     IF( PRESENT(fbc) ) THEN
        DO i=1,p/2
           c(i,:)       = fbc(1,i,:)     ! Left boundary
           c(dim-i+1,:) = fbc(2,i,:)     ! Right boundary
        END DO
        ishift = p/2
     END IF
 !
 !    Interior points   ! c(:,j) for j>SIZE(f,2) could be anything 
     DO i=1,sp%nsites   ! (periodicity in the 2nd dimension)!
        c(i+ishift,1:SIZE(f,2)) = f(i,:)
     END DO
 !!$    WRITE(*,'(a/(13f8.3))') 'RHS', c
 !
 !    Solve for the interpolation coefs. using the factored sp%mat
     CALL bsolve(sp%mat, c)
 !
   END SUBROUTINE splcoefn
 !===========================================================================
   SUBROUTINE splcoefp1(sp, f, c)
 !
 !   Compute the spline coefficient c from grid values f
 !   f(x) is periodic
 !
     USE matrix
 !
     TYPE(spline1d) :: sp
     DOUBLE PRECISION, INTENT(in) :: f(:)
     DOUBLE PRECISION, INTENT(out) :: c(:)
 !
     DOUBLE PRECISION, POINTER  :: arow(:), brow(:)
     DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0, minus1=-1.0d0
     INTEGER :: dim, p, rank, bandw
     INTEGER :: i, j
 !________________________________________________________________________________
 !
     p     = sp%order-1
     rank  = sp%nints
     bandw = SIZE(sp%matp%matvt,1)
 !
 !    Solve the interpolation system
 !
 !  Solve Ay = f
     ALLOCATE(arow(rank), brow(bandw))
 !
     arow(1:rank) = f(1:rank)
     CALL bsolve(sp%matp%mat, arow)
 !
 !  For constant and linear splines, A is diagnonal!
 !  Should skip the rest
 !
     IF( p.LE.1 ) GOTO 100
 !
 !
 !  t = V^T*y
     CALL dgemv('N', bandw, rank, one, sp%matp%matvt, bandw, arow, 1, zero, &
          &      brow, 1)
 !
 !  y = y - Ut
     CALL dgemv('N', rank, bandw, minus1, sp%matp%matu, rank, brow, 1, one, &
          &      arow, 1)
 !
 100 CONTINUE
 !
 !  Interpolation coefficients
     dim   = sp%dim
     DO i=1,dim
        j = MODULO(i-1-p/2, rank) + 1
        c(i) = arow(j)
     END DO
 !
     DEALLOCATE(arow,brow)
 !
   END SUBROUTINE splcoefp1
 !===========================================================================
   SUBROUTINE splcoefpn(sp, f, c)
 !
 !   Compute the spline coefficient c from grid values f
 !   f(x) is periodic
 !
     USE matrix
 !
     TYPE(spline1d) :: sp
     DOUBLE PRECISION, INTENT(in) :: f(:,:)
     DOUBLE PRECISION, INTENT(out) :: c(:,:)
 !
     DOUBLE PRECISION, POINTER  :: arow(:,:), brow(:,:)
     DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0, minus1=-1.0d0
     INTEGER :: p, dim, rank, bandw, nrhs
     INTEGER :: i, j, k
 !________________________________________________________________________________
 !
     p     = sp%order-1
     rank  = sp%nints
     bandw = SIZE(sp%matp%matvt,1)
     nrhs = SIZE(f,2)
 !
 !
 !    Solve the interpolation system
 !
 !  Solve Ay = f
     ALLOCATE(arow(rank,nrhs), brow(bandw,nrhs))
 !
     arow(1:rank,1:nrhs) = f(1:rank,1:nrhs)
     CALL bsolve(sp%matp%mat, arow)
 !
 !  For constant and linear splines, A is diagnonal!
 !  Should skip the rest
 !
     IF( p.LE.1 ) GOTO 100
 !
 !
 !  t = V^T*y
     CALL dgemm('N', 'N', bandw, nrhs, rank, one, sp%matp%matvt, bandw, arow, &
          &      rank, zero, brow, bandw)
 !
 !  y = y - Ut
     CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, sp%matp%matu, rank, brow, &
          &      bandw, one, arow, rank)
 !
 100 CONTINUE
 !
 !  Interpolation coefficients
     dim   = sp%dim
     DO k=1,nrhs
        DO i=1,dim
           j = MODULO(i-1-p/2, rank) + 1
           c(i,k) = arow(j,k)
        END DO
     END DO
 !
     DEALLOCATE(arow,brow)
 !
   END SUBROUTINE splcoefpn
 !
 !===========================================================================
   SUBROUTINE topp0(sp, c, ppform)
 !
 !   Compute PPFORM of a fuction defined by the spline SP
 !   and spline coefficients C(1:d)
 !
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE PRECISION, INTENT(in) :: c(:)
     DOUBLE PRECISION, INTENT(out) :: ppform(0:,:)
     INTEGER :: p, nints, i, j, k
 !
     p = sp%order - 1
     nints = sp%nints
 !
     ppform = 0.0d0
     DO i=1,nints       ! on each knot interval
        DO j=1,p+1      ! all spline in interval i
           DO k=0,p     ! k_th derivatives
              ppform(k,i) = ppform(k,i) + sp%val0(k,j,i)*c(j+i-1)
           END DO
        END DO
     END DO
 !
   END SUBROUTINE topp0
 !===========================================================================
   SUBROUTINE topp0z(sp, c, ppform)
 !
 !   Compute PPFORM of a fuction defined by the spline SP
 !   and spline coefficients C(1:d)
 !
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE COMPLEX, INTENT(in) :: c(:)
     DOUBLE COMPLEX, INTENT(out) :: ppform(0:,:)
     INTEGER :: p, nints, i, j, k
 !
     p = sp%order - 1
     nints = sp%nints
 !
     ppform = (0.0d0, 0.0d0)
     DO i=1,nints       ! on each knot interval
        DO j=1,p+1      ! all spline in interval i
           DO k=0,p     ! k_th derivatives
              ppform(k,i) = ppform(k,i) + sp%val0(k,j,i)*c(j+i-1)
           END DO
        END DO
     END DO
 !
   END SUBROUTINE topp0z
 !===========================================================================
   SUBROUTINE topp1(sp, c, ppform)
 !
 !   Compute PPFORM of a fuction defined by the spline SP
 !   and spline coefficients C(1:d,:)
 !
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE PRECISION, INTENT(in) :: c(:,:)
     DOUBLE PRECISION, INTENT(out) :: ppform(:,:,:)
     INTEGER :: m
 !
     DO m=1,SIZE(c,2)
        CALL topp0(sp, c(:,m), ppform(m,:,:))
     END DO
 !
   END SUBROUTINE topp1
 !===========================================================================
   SUBROUTINE topp1z(sp, c, ppform)
 !
 !   Compute PPFORM of a fuction defined by the spline SP
 !   and spline coefficients C(1:d,:)
 !
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE COMPLEX, INTENT(in) :: c(:,:)
     DOUBLE COMPLEX, INTENT(out) :: ppform(:,:,:)
     INTEGER :: m
 !
     DO m=1,SIZE(c,2)
        CALL topp0z(sp, c(:,m), ppform(m,:,:))
     END DO
 !
   END SUBROUTINE topp1z
 !===========================================================================
   SUBROUTINE topp2(sp, c, ppform)
 !
 !   Compute PPFORM of a fuction defined by the spline SP
 !   and spline coefficients C(1:d,:,:)
 !
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE PRECISION, INTENT(in) :: c(:,:,:)
     DOUBLE PRECISION, INTENT(out) :: ppform(:,:,:,:)
     INTEGER :: m, mm
 !
     DO mm=1,SIZE(c,3)
        DO m=1,SIZE(c,2)
           CALL topp0(sp, c(:,m,mm), ppform(m,mm,:,:))
        END DO
     END DO
 !
   END SUBROUTINE topp2
 !===========================================================================
   SUBROUTINE topp2z(sp, c, ppform)
 !
 !   Compute PPFORM of a fuction defined by the spline SP
 !   and spline coefficients C(1:d,:,:)
 !
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE COMPLEX, INTENT(in) :: c(:,:,:)
     DOUBLE COMPLEX, INTENT(out) :: ppform(:,:,:,:)
     INTEGER :: m, mm
 !
     DO mm=1,SIZE(c,3)
        DO m=1,SIZE(c,2)
           CALL topp0z(sp, c(:,m,mm), ppform(m,mm,:,:))
        END DO
     END DO
 !
   END SUBROUTINE topp2z
 !===========================================================================
   SUBROUTINE topp3(sp, c, ppform)
 !
 !   Compute PPFORM of a fuction defined by the spline SP
 !   and spline coefficients C(1:d,:,:,:)
 !
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE PRECISION, INTENT(in) :: c(:,:,:,:)
     DOUBLE PRECISION, INTENT(out) :: ppform(:,:,:,:,:)
     INTEGER :: m, mm, mmm
 !
     DO mmm=1,SIZE(c,4)
        DO mm=1,SIZE(c,3)
           DO m=1,SIZE(c,2)
                  CALL topp0(sp, c(:,m,mm,mmm), ppform(m,mm,mmm,:,:))
           END DO
        END DO
     END DO
 !
   END SUBROUTINE topp3
 !===========================================================================
   SUBROUTINE topp3z(sp, c, ppform)
 !
 !   Compute PPFORM of a fuction defined by the spline SP
 !   and spline coefficients C(1:d,:,:,:)
 !
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE COMPLEX, INTENT(in) :: c(:,:,:,:)
     DOUBLE COMPLEX, INTENT(out) :: ppform(:,:,:,:,:)
     INTEGER :: m, mm, mmm
 !
     DO mmm=1,SIZE(c,4)
        DO mm=1,SIZE(c,3)
           DO m=1,SIZE(c,2)
                  CALL topp0z(sp, c(:,m,mm,mmm), ppform(m,mm,mmm,:,:))
           END DO
        END DO
     END DO
 !
   END SUBROUTINE topp3z
 !===========================================================================
   SUBROUTINE topp4(sp, c, ppform)
 !
 !   Compute PPFORM of a fuction defined by the spline SP
 !   and spline coefficients C(1:d,:,:)
 !
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE PRECISION, INTENT(in) :: c(:,:,:,:,:)
     DOUBLE PRECISION, INTENT(out) :: ppform(:,:,:,:,:,:)
     INTEGER :: m, mm, mmm, mmmm
 !
     DO mmmm=1,SIZE(c,5)
        DO mmm=1,SIZE(c,4)
           DO mm=1,SIZE(c,3)
              DO m=1,SIZE(c,2)
                 CALL topp0(sp, c(:,m,mm,mmm,mmmm), ppform(m,mm,mmm,mmmm,:,:))
              END DO
           END DO
        END DO
     END DO
 !
   END SUBROUTINE topp4
 !===========================================================================
   SUBROUTINE topp4z(sp, c, ppform)
 !
 !   Compute PPFORM of a fuction defined by the spline SP
 !   and spline coefficients C(1:d,:,:)
 !
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE COMPLEX, INTENT(in) :: c(:,:,:,:,:)
     DOUBLE COMPLEX, INTENT(out) :: ppform(:,:,:,:,:,:)
     INTEGER :: m, mm, mmm, mmmm
 !
     DO mmmm=1,SIZE(c,5)
        DO mmm=1,SIZE(c,4)
           DO mm=1,SIZE(c,3)
              DO m=1,SIZE(c,2)
                 CALL topp0z(sp, c(:,m,mm,mmm,mmmm), ppform(m,mm,mmm,mmmm,:,:))
              END DO
           END DO
        END DO
     END DO
 !
   END SUBROUTINE topp4z
 !===========================================================================
   SUBROUTINE ppval0(sp, x, ppform, left, jder, f)
 !
 !   Compute function and derivatives from the PP representation
 !    f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k
 !
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE PRECISION, INTENT(in) :: x, ppform(:)
     INTEGER, INTENT(in) :: left, jder
     DOUBLE PRECISION, INTENT(out) :: f
     DOUBLE PRECISION :: h, fact
     INTEGER :: j, order
 !
     order = sp%order   ! Polynomial degree p + 1
 !
     h = x-sp%knots(left)
     f = 0.0d0
     IF( jder .LT. 0 .OR. jder .GE. order ) RETURN
 !
     SELECT CASE (jder)
     CASE(0)            ! function value
        DO j=order,1,-1
           f = f*h + ppform(j)
        END DO
     CASE(1)            ! 1st derivative
        DO j=order-1,1,-1
           f = f*h + j*ppform(j+1)
        END DO
     CASE default       ! 2nd and higher derivatives
        fact = order-jder
        DO j=order,jder+1,-1
           f = f/fact*j*h + ppform(j)
           fact = fact-1.0d0
        END DO
        DO j=2,jder
           f = f*j
        END DO
     END SELECT
   END SUBROUTINE ppval0
 !===========================================================================
   SUBROUTINE ppval0z(sp, x, ppform, left, jder, f)
 !
 !   Compute function and derivatives from the PP representation
 !    f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k
 !
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE PRECISION, INTENT(in) :: x
     DOUBLE COMPLEX, INTENT(in) :: ppform(:)
     INTEGER, INTENT(in) :: left, jder
     DOUBLE COMPLEX, INTENT(out) :: f
     DOUBLE PRECISION :: h, fact
     INTEGER :: j, order
 !
     order = sp%order   ! Polynomial degree p + 1
 !
     h = x-sp%knots(left)
     f = (0.0d0,0.0d0)
     IF( jder .LT. 0 .OR. jder .GE. order ) RETURN
 !
     SELECT CASE (jder)
     CASE(0)            ! function value
        DO j=order,1,-1
           f = f*h + ppform(j)
        END DO
     CASE(1)            ! 1st derivative
        DO j=order-1,1,-1
           f = f*h + j*ppform(j+1)
        END DO
     CASE default       ! 2nd and higher derivatives
        fact = order-jder
        DO j=order,jder+1,-1
           f = f/fact*j*h + ppform(j)
           fact = fact-1.0d0
        END DO
        DO j=2,jder
           f = f*j
        END DO
     END SELECT
   END SUBROUTINE ppval0z
 !===========================================================================
   SUBROUTINE ppval0z_n(sp, x, ppform, left, jder, f)
 !
 !   PPVAL0Z for many points x(:)
 !
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE PRECISION, INTENT(in) :: x(:)
     DOUBLE COMPLEX, INTENT(in) :: ppform(:,:)
     INTEGER, INTENT(in) :: left(:), jder
     DOUBLE COMPLEX, INTENT(out) :: f(:)
     DOUBLE PRECISION :: h(SIZE(x)), fact
     INTEGER :: j, order
 !
     order = sp%order   ! Polynomial degree p + 1
 !
     h(:) = x(:)-sp%knots(left(:))
     f(:) = 0.0d0
     IF( jder .LT. 0 .OR. jder .GE. order ) RETURN
 !
     SELECT CASE (jder)
     CASE(0)            ! function value
        DO j=order,1,-1
           f(:) = f(:)*h(:) + ppform(:,j)
        END DO
     CASE(1)            ! 1st derivative
        DO j=order-1,1,-1
           f(:) = f(:)*h(:) + j*ppform(:,j+1)
        END DO
     CASE default       ! 2nd and higher derivatives
        fact = order-jder
        DO j=order,jder+1,-1
           f(:) = f(:)/fact*j*h(:) + ppform(:,j)
           fact = fact-1.0d0
        END DO
        DO j=2,jder
           f(:) = f(:)*j
        END DO
     END SELECT
   END SUBROUTINE ppval0z_n
 !===========================================================================
   SUBROUTINE ppval0_n(sp, x, ppform, left, jder, f)
 !
 !   PPVAL0 for many points x(:)
 !
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE PRECISION, INTENT(in) :: x(:), ppform(:,:)
     INTEGER, INTENT(in) :: left(:), jder
     DOUBLE PRECISION, INTENT(out) :: f(:)
     DOUBLE PRECISION :: h(SIZE(x)), fact
     INTEGER :: j, order
 !
     order = sp%order   ! Polynomial degree p + 1
 !
     h(:) = x(:)-sp%knots(left(:))
     f(:) = 0.0d0
     IF( jder .LT. 0 .OR. jder .GE. order ) RETURN
 !
     SELECT CASE (jder)
     CASE(0)            ! function value
        DO j=order,1,-1
           f(:) = f(:)*h(:) + ppform(:,j)
        END DO
     CASE(1)            ! 1st derivative
        DO j=order-1,1,-1
           f(:) = f(:)*h(:) + j*ppform(:,j+1)
        END DO
     CASE default       ! 2nd and higher derivatives
        fact = order-jder
        DO j=order,jder+1,-1
           f(:) = f(:)/fact*j*h(:) + ppform(:,j)
           fact = fact-1.0d0
        END DO
        DO j=2,jder
           f(:) = f(:)*j
        END DO
     END SELECT
   END SUBROUTINE ppval0_n
 !===========================================================================
   SUBROUTINE ppval1(sp, x, ppform, left, jder, f)
 !
 !   Compute function and derivatives from the PP representation
 !    f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k
 !
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE PRECISION, INTENT(in) :: x, ppform(:,:)
     INTEGER, INTENT(in) :: left, jder
     DOUBLE PRECISION, INTENT(out) :: f(:)
     DOUBLE PRECISION :: h, fact
     INTEGER :: j, order
 !
     order = sp%order   ! Polynomial degree p + 1
 !
     h = x-sp%knots(left)
     f = 0.0d0
     IF( jder .LT. 0 .OR. jder .GE. order ) RETURN
 !
     SELECT CASE (jder)
     CASE(0)            ! function value
        DO j=order,1,-1
           f(:) = f(:)*h + ppform(j,:)
        END DO
     CASE(1)            ! 1st derivative
        DO j=order-1,1,-1
           f(:) = f(:)*h + j*ppform(j+1,:)
        END DO
     CASE default       ! 2nd and higher derivatives
        fact = order-jder
        DO j=order,jder+1,-1
           f(:) = f(:)/fact*j*h + ppform(j,:)
           fact = fact-1.0d0
        END DO
        DO j=2,jder
           f(:) = f(:)*j
        END DO
     END SELECT
   END SUBROUTINE ppval1
 !===========================================================================
   SUBROUTINE ppval1z(sp, x, ppform, left, jder, f)
 !
 !   Compute function and derivatives from the PP representation
 !    f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k
 !
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE PRECISION, INTENT(in) :: x
     DOUBLE COMPLEX, INTENT(in) :: ppform(:,:)
     INTEGER, INTENT(in) :: left, jder
     DOUBLE COMPLEX, INTENT(out) :: f(:)
     DOUBLE PRECISION :: h, fact
     INTEGER :: j, order
 !
     order = sp%order   ! Polynomial degree p + 1
 !
     h = x-sp%knots(left)
     f = (0.0d0,0.0d0)
     IF( jder .LT. 0 .OR. jder .GE. order ) RETURN
 !
     SELECT CASE (jder)
     CASE(0)            ! function value
        DO j=order,1,-1
           f(:) = f(:)*h + ppform(j,:)
        END DO
     CASE(1)            ! 1st derivative
        DO j=order-1,1,-1
           f(:) = f(:)*h + j*ppform(j+1,:)
        END DO
     CASE default       ! 2nd and higher derivatives
        fact = order-jder
        DO j=order,jder+1,-1
           f(:) = f(:)/fact*j*h + ppform(j,:)
           fact = fact-1.0d0
        END DO
        DO j=2,jder
           f(:) = f(:)*j
        END DO
     END SELECT
   END SUBROUTINE ppval1z
 !===========================================================================
   SUBROUTINE ppval2(sp, x, ppform, left, jder, f)
 !
 !   Compute function and derivatives from the PP representation
 !    f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k
 !
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE PRECISION, INTENT(in) :: x, ppform(:,:,:)
     INTEGER, INTENT(in) :: left, jder
     DOUBLE PRECISION, INTENT(out) :: f(:,:)
     DOUBLE PRECISION :: h, fact
     INTEGER :: j, order
 !
     order = sp%order   ! Polynomial degree p + 1
 !
     h = x-sp%knots(left)
     f = 0.0d0
     IF( jder .LT. 0 .OR. jder .GE. order ) RETURN
 !
     SELECT CASE (jder)
     CASE(0)            ! function value
        DO j=order,1,-1
           f(:,:) = f(:,:)*h + ppform(j,:,:)
        END DO
     CASE(1)            ! 1st derivative
        DO j=order-1,1,-1
           f(:,:) = f(:,:)*h + j*ppform(j+1,:,:)
        END DO
     CASE default       ! 2nd and higher derivatives
        fact = order-jder
        DO j=order,jder+1,-1
           f(:,:) = f(:,:)/fact*j*h + ppform(j,:,:)
           fact = fact-1.0d0
        END DO
        DO j=2,jder
           f(:,:) = f(:,:)*j
        END DO
     END SELECT
   END SUBROUTINE ppval2
 !===========================================================================
   SUBROUTINE ppval2z(sp, x, ppform, left, jder, f)
 !
 !   Compute function and derivatives from the PP representation
 !    f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k
 !
     TYPE(spline1d), INTENT(in) :: sp
     DOUBLE PRECISION, INTENT(in) :: x
     DOUBLE COMPLEX, INTENT(in) :: ppform(:,:,:)
     INTEGER, INTENT(in) :: left, jder
     DOUBLE COMPLEX, INTENT(out) :: f(:,:)
     DOUBLE PRECISION :: h, fact
     INTEGER :: j, order
 !
     order = sp%order   ! Polynomial degree p + 1
 !
     h = x-sp%knots(left)
     f = (0.0d0,0.0d0)
     IF( jder .LT. 0 .OR. jder .GE. order ) RETURN
 !
     SELECT CASE (jder)
     CASE(0)            ! function value
        DO j=order,1,-1
           f(:,:) = f(:,:)*h + ppform(j,:,:)
        END DO
     CASE(1)            ! 1st derivative
        DO j=order-1,1,-1
           f(:,:) = f(:,:)*h + j*ppform(j+1,:,:)
        END DO
     CASE default       ! 2nd and higher derivatives
        fact = order-jder
        DO j=order,jder+1,-1
           f(:,:) = f(:,:)/fact*j*h + ppform(j,:,:)
           fact = fact-1.0d0
        END DO
        DO j=2,jder
           f(:,:) = f(:,:)*j
        END DO
     END SELECT
   END SUBROUTINE ppval2z
 !===========================================================================
   SUBROUTINE locintv0_old(sp, x, left)
 !
 !   Locate the interval containing x
 !   Should be in [0, nints-1]
 !
     TYPE(spline1d) :: sp
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: left
     DOUBLE PRECISION :: hinv
     INTEGER :: nints
 !
     nints = sp%nints
 !
 !  Case of equidistant mesh
     IF( sp%nlequid) THEN
        hinv = sp%hinv
        left = MAX(0,MIN(FLOOR((x-sp%knots(0))*hinv),nints-1)) 
        RETURN
     END IF
 !
 !  Non-equistant mesh
     left = sp%left
     DO
        IF( left .EQ. nints ) THEN 
           left = nints-1
           EXIT
        END IF
        IF( left .LT. 0 ) THEN 
           left = 0
           EXIT
        END IF
        IF( x .LT. sp%knots(left+1) ) THEN
           IF( x .GE. sp%knots(left) ) THEN
              EXIT
           ELSE
              left = left-1
           END IF
        ELSE
           left = left+1
        END IF
     END DO
     IF(left .GT. 0 .AND. left .LT. nints) THEN
        sp%left = left
     END IF
   END SUBROUTINE locintv0_old
 !
 !===========================================================================
   SUBROUTINE locintv1_old(sp, x, left)
 !
 !   Locate the intervals left(:) containing x(:)
 !   Should be in [0, nints-1]
 !
     TYPE(spline1d) :: sp
     DOUBLE PRECISION, INTENT(in) :: x(:)
     INTEGER, INTENT(out) :: left(:)
     DOUBLE PRECISION :: hinv
     INTEGER :: nints, i
 !
 !  Case of equidistant mesh
     nints = sp%nints
     IF( sp%nlequid) THEN
        hinv = sp%hinv
        left(:) = MAX(0,MIN(FLOOR((x(:)-sp%knots(0))*hinv),nints-1))
        RETURN
     END IF
 !
 !  Non-equistant mesh
     DO i=1,SIZE(x)
        CALL locintv0_old(sp, x(i), left(i))
     END DO
   END SUBROUTINE locintv1_old
 !
 !===========================================================================
   SUBROUTINE locintv0(sp, x, left)
 !
 !   Locate the interval containing x
 !   Should be in [0, nints-1]
 !
     TYPE(spline1d) :: sp
     DOUBLE PRECISION, INTENT(in) :: x
     INTEGER, INTENT(out) :: left
     DOUBLE PRECISION :: hinv
     INTEGER :: l, nf, nints
 !
     nints = sp%nints
 !
 !  Case of equidistant mesh
     IF( sp%nlequid) THEN
        hinv = sp%hinv
        left = MAX(0,MIN(FLOOR((x-sp%knots(0))*hinv),nints-1)) 
        RETURN
     END IF
 !
 !  Non-equistant mesh
     hinv = sp%hinv
     nf = SIZE(sp%fmap) - 1
     l = MAX(0,MIN(FLOOR((x-sp%knots(0))*hinv),nf-1))   ! left on fine mesh
     left = sp%fmap(l)
     IF( x.GE.sp%knots(left+1) ) left = MIN(left+1,nints-1)
   END SUBROUTINE locintv0
 !
 !===========================================================================
   SUBROUTINE locintv1(sp, x, left)
 !
 !   Locate the intervals left(:) containing x(:)
 !   Should be in [0, nints-1]
 !
     TYPE(spline1d) :: sp
     DOUBLE PRECISION, INTENT(in) :: x(:)
     INTEGER, INTENT(out) :: left(:)
     INTEGER :: l(SIZE(x))
     DOUBLE PRECISION :: hinv
     INTEGER :: npt, nf, nints, i
 !
     npt = SIZE(x)
 !
 !  Case of equidistant mesh
     nints = sp%nints
     IF( sp%nlequid) THEN
        hinv = sp%hinv
        left(1:npt) = MAX(0,MIN(FLOOR((x(1:npt)-sp%knots(0))*hinv),nints-1))
        RETURN
     END IF
 !
 !  Non-equistant mesh
     hinv = sp%hinv
     nf = SIZE(sp%fmap) - 1
     l(:) = MAX(0,MIN(FLOOR((x(:)-sp%knots(0))*hinv),nf-1))   ! left on fine mesh
     left(1:npt) = sp%fmap(l(1:npt))
     WHERE( x.GE.sp%knots(left+1) ) left = MIN(left+1,nints-1)
   END SUBROUTINE locintv1
 !
 !===========================================================================
   SUBROUTINE destroy_sp1d(sp)
 !
 !  Clean up 1d spline object
 !
     TYPE(spline1d) :: sp
 !
     IF( ASSOCIATED(sp%knots) ) DEALLOCATE (sp%knots)
     IF( ASSOCIATED(sp%val0) ) DEALLOCATE (sp%val0)
     IF( ASSOCIATED(sp%valc) ) DEALLOCATE (sp%valc)
     IF( ASSOCIATED(sp%gausx) ) DEALLOCATE (sp%gausx)
     IF( ASSOCIATED(sp%gausw) ) DEALLOCATE (sp%gausw)
     IF( ASSOCIATED(sp%intspl) ) DEALLOCATE (sp%intspl)
     IF( ASSOCIATED(sp%ppform) ) DEALLOCATE (sp%ppform)
     IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE (sp%ppformz)
     IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs)
     IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc)
     IF( ASSOCIATED(sp%fmap) ) DEALLOCATE(sp%fmap)
 !
     CALL destroy(sp%mat)
     CALL destroy(sp%matp)
     CALL destroy_dftmap(sp%dft)
   END SUBROUTINE destroy_sp1d
 !
 !===========================================================================
   SUBROUTINE destroy_dftmap(m)
 !
 !   Clean up DFTMAP
 !
     TYPE(dftmap) :: m
 !
     IF(ASSOCIATED(m%coefs)) DEALLOCATE(m%coefs)
   END SUBROUTINE destroy_dftmap
 !===========================================================================
   SUBROUTINE destroy_sp2d(sp)
 !
 !  Clean up 2d spline object
 !
     TYPE(spline2d) :: sp
 !
     IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform)
     IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz)
     IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs)
     IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc)
     CALL destroy_sp1d(sp%sp1)
     CALL destroy_sp1d(sp%sp2)
   END SUBROUTINE destroy_sp2d
 !===========================================================================
   SUBROUTINE destroy_sp2d1d(sp)
 !
 !  Clean up 2d1d spline object
 !
     TYPE(spline2d1d) :: sp
 !
     IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform)
     IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE (sp%ppformz)
     IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs)
     IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc)
     CALL destroy_sp2d(sp%sp12)
     CALL destroy_sp1d(sp%sp3)
   END SUBROUTINE destroy_sp2d1d
 !
 !===========================================================================
   SUBROUTINE calc_integ0(sp, finteg)
 !
 !   Compute integral of splines
 !
     TYPE(spline1d) :: sp
     DOUBLE PRECISION, INTENT(out) :: finteg(0:)
     DOUBLE PRECISION, ALLOCATABLE :: xg(:), wg(:), fun(:,:)
     DOUBLE PRECISION :: x1, x2
     INTEGER :: dim, nx, nidbas, ng, i, ig, j, jj, left
 !
     CALL get_dim(sp, dim, nx, nidbas)
     ng = MAX(2, (nidbas+2)/2)
     ALLOCATE(xg(ng), wg(ng), fun(0:nidbas,1))
     fun = 0.0d0
     finteg = 0.0d0
     DO i=1,nx             ! Loop thru the intervals
        left = i
        x1 = sp%knots(i-1)
        x2 = sp%knots(i)
        CALL gauleg(x1, x2, xg, wg, ng)
        DO ig=1,ng         ! Loop thru Gauss points
           CALL basfun(xg(ig), sp, fun, i)
           left = i-1
           DO j=0,nidbas   ! Loop thru the splines [left:left+nidbas]
              jj = left+j  ! in this interval
              IF( sp%period ) jj = MODULO(left+j, nx)
              finteg(jj) = finteg(jj) + wg(ig)*fun(j,1)
           END DO
        END DO
     END DO
     DEALLOCATE(xg, wg, fun)
   END SUBROUTINE calc_integ0
 !===========================================================================
   SUBROUTINE calc_integn(sp, finteg)
 !
 !   Compute integrals = Int( x^a \Lambda_j(x) ), a=0,1,... n
 !
     TYPE(spline1d) :: sp
     DOUBLE PRECISION, INTENT(out) :: finteg(0:,0:)
     DOUBLE PRECISION, ALLOCATABLE :: xg(:), wg(:), fun(:,:)
     DOUBLE PRECISION :: x1, x2, xpow
     INTEGER :: dim, nx, nidbas, ng, i, ig, j, k, jj, left
     INTEGER :: nord
 !
     nord = SIZE(finteg,2)-1
     CALL get_dim(sp, dim, nx, nidbas)
     ng = MAX(2, (nidbas+nord+2)/2)
     ALLOCATE(xg(ng), wg(ng), fun(0:nidbas,1))
     fun = 0.0d0
     finteg = 0.0d0
     DO i=1,nx             ! Loop thru the intervals
        left = i
        x1 = sp%knots(i-1)
        x2 = sp%knots(i)
        CALL gauleg(x1, x2, xg, wg, ng)
        DO ig=1,ng         ! Loop thru Gauss points
           CALL basfun(xg(ig), sp, fun, i)
           left=i-1
           DO j=0,nidbas   ! Loop thru the splines [left:left+nidbas]
              jj = left+j  ! in this interval
              IF( sp%period ) jj = MODULO(left+j, nx)
              xpow = wg(ig)*fun(j,1)
              DO k=0,nord
                 finteg(jj,k) = finteg(jj,k) + xpow
                 xpow = xpow*xg(ig)
              END DO
           END DO
        END DO
     END DO
     DEALLOCATE(xg, wg, fun)
   END SUBROUTINE calc_integn
 !===========================================================================
 !
   DOUBLE PRECISION FUNCTION fintg1(sp, c)
 !
 !   Integral of 1d function from its spline coefs c.
 !
     TYPE(spline1d) :: sp
     DOUBLE PRECISION, INTENT(in) :: c(0:)
     INTEGER :: dim
     dim = sp%dim
     fintg1 = DOT_PRODUCT(sp%intspl(0:dim-1), c(0:dim-1))
   END FUNCTION fintg1
 !===========================================================================
   DOUBLE PRECISION FUNCTION fintg2(sp, c)
 !
 !   Integral of 2d function from its spline coefs c.
 !
     TYPE(spline2d) :: sp
     DOUBLE PRECISION, INTENT(in) :: c(0:,0:)
     INTEGER :: dim1, dim2, i, j
     dim1 = sp%sp1%dim
     dim2 = sp%sp2%dim
     fintg2 = 0.0d0
     DO j=0,dim2-1
        DO i=0,dim1-1
           fintg2 = fintg2 + c(i,j)*sp%sp1%intspl(i)*sp%sp2%intspl(j)
        END DO
     END DO
   END FUNCTION fintg2
 !===========================================================================
   SUBROUTINE gridval2d_2d(sp, xp, yp, fp, jder, c, ppform)
 !
 !  Compute values or jder-th dervivative of f(x,y) from ppform 
 !  of spline sp. Recompute the ppform if the optional spline 
 !  coefficients c are given.
 !
 !              F(I,J) = F(X(I), Y(J))
 !
     TYPE(spline2d), INTENT(inout) :: sp
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(out) :: fp
     INTEGER, INTENT(in) :: jder(2)
     DOUBLE PRECISION, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c
     DOUBLE PRECISION, DIMENSION(:,:,:,:), OPTIONAL :: ppform
 !
     INTEGER :: d1, d2, k1, k2, n1, n2
     DOUBLE PRECISION, ALLOCATABLE :: work(:,:,:), temp(:)
     DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:)
     DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp))
     INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp))
     INTEGER :: i, j, k, ii, jj
     LOGICAL :: nlppform
 !
     d1 = sp%sp1%dim
     d2 = sp%sp2%dim
     k1 = sp%sp1%order
     k2 = sp%sp2%order
     n1 = sp%sp1%nints
     n2 = sp%sp2%nints
     nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform
 !
 !   Compute PPFORMM/BCOEFS if spline coefs are passed
 !
     IF( PRESENT(c) ) THEN
        IF( nlppform ) THEN
           ALLOCATE(work(d2,k1,n1))
           CALL topp1(sp%sp1, c , work)
           IF(PRESENT(ppform)) THEN
              CALL topp2(sp%sp2, work, ppform)
           ELSE
              IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform)
              ALLOCATE(sp%ppform(k1,n1,k2,n2))
              CALL topp2(sp%sp2, work, sp%ppform)
           END IF
           DEALLOCATE(work)
        ELSE
           IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs)
           ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2)))
           sp%bcoefs = c
        END IF
     END IF
 !
 !   Applly periodicity if required
 !
     IF( sp%sp1%period ) THEN   !  ** Applly periodicity **
        x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod)
     ELSE
        x = xp
     END IF
     IF( sp%sp2%period ) THEN   !  ** Applly periodicity **
        y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod)
     ELSE
        y = yp
     END IF
 !
 !   Locate interval containing (x,y)
 !
     CALL locintv(sp%sp1, x, leftx)
     CALL locintv(sp%sp2, y, lefty)
 !
 !  Compute function/derivatives
 !
     IF( nlppform ) THEN   ! using PP form
        ALLOCATE(temp(k2))
        DO j=1,SIZE(y)
           DO i=1,SIZE(x)
              IF(PRESENT(ppform)) THEN
                 CALL ppval(sp%sp1, x(i), ppform(:,leftx(i)+1,:,lefty(j)+1),&
                      &     leftx(i), jder(1), temp)
              ELSE
                 CALL ppval(sp%sp1, x(i), sp%ppform(:,leftx(i)+1,:,lefty(j)+1),&
                      &     leftx(i), jder(1), temp)
              END IF
              CALL ppval(sp%sp2, y(j), temp, lefty(j), jder(2), fp(i,j))
           END DO
        END DO
        DEALLOCATE(temp)
     ELSE                   ! using spline expansion
        ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2)))
        fp = 0.0d0
        DO j=1,SIZE(y)
           CALL basfun(y(j), sp%sp2, funy, lefty(j)+1)
           DO i=1,SIZE(x)
              CALL basfun(x(i), sp%sp1, funx, leftx(i)+1)
              DO jj=1,k2
                 DO ii=1,k1
                    fp(i,j) = fp(i,j) + sp%bcoefs(leftx(i)+ii,lefty(j)+jj) * &
                         &   funx(ii,jder(1))*funy(jj,jder(2))
                 END DO
              END DO
           END DO
        END DO
        DEALLOCATE(funx, funy)
     END IF
   END SUBROUTINE gridval2d_2d
 !===========================================================================
   SUBROUTINE gridval2d1d_3d(sp, xp, yp, zp, fp, jder, c, ppform)
 !
 !  Compute values or jder-th dervivative of f(x,y,z) from spline
 !  coefficients (nlppform=.false.) or ppform (nlppform=.true.)
 !
 !              F(I,J,K) = F(X(I), Y(J), Z(K))
 !
     TYPE(spline2d1d), TARGET :: sp
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp, zp
     DOUBLE PRECISION, DIMENSION(:,:,:), INTENT(out) :: fp
     INTEGER, INTENT(in) :: jder(3)
     DOUBLE PRECISION, DIMENSION(:,:,:), OPTIONAL, INTENT(in) :: c
     DOUBLE PRECISION, DIMENSION(:,:,:,:,:,:), OPTIONAL :: ppform
 !
     TYPE(spline2d), POINTER :: sp2
     DOUBLE PRECISION, ALLOCATABLE :: work1(:,:,:,:), work2(:,:,:,:,:)
     DOUBLE PRECISION, ALLOCATABLE :: temp1(:,:), temp2(:)
     DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:), funz(:,:)
     DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)), z(SIZE(zp))
     INTEGER          :: leftx(SIZE(xp)), lefty(SIZE(yp)), leftz(SIZE(zp))
     INTEGER :: d1, d2, d3, k1, k2, k3, n1, n2, n3
     INTEGER :: ipx, ipy, ipz, k, ii, jj, kk
     LOGICAL :: nlppform
 !--------------------------------------------------------------------------------
 !                               1. Prologue
 !
     sp2 => sp%sp12
     d1 = sp2%sp1%dim
     d2 = sp2%sp2%dim
     d3 = sp%sp3%dim
     k1 = sp2%sp1%order
     k2 = sp2%sp2%order
     k3 = sp%sp3%order
     n1 = sp2%sp1%nints
     n2 = sp2%sp2%nints
     n3 = sp%sp3%nints
     nlppform = sp2%sp1%nlppform .OR. sp2%sp2%nlppform .OR. sp%sp3%nlppform
 !
 !   Applly periodicity if required
     IF( sp2%sp1%period ) THEN
        x = sp2%sp1%knots(0) + MODULO(xp-sp2%sp1%knots(0), sp2%sp1%lperiod)
     ELSE
        x = xp
     END IF
     IF( sp2%sp2%period ) THEN
        y = sp2%sp2%knots(0) + MODULO(yp-sp2%sp2%knots(0), sp2%sp2%lperiod)
     ELSE
        y = yp
     END IF
     IF( sp%sp3%period ) THEN
        z = sp%sp3%knots(0) + MODULO(zp-sp%sp3%knots(0), sp%sp3%lperiod)
     ELSE
        z = zp
     END IF
 !
 !   Locate interval containing (x,y,z)
     CALL locintv(sp2%sp1, x, leftx)
     CALL locintv(sp2%sp2, y, lefty)
     CALL locintv(sp%sp3,  z, leftz)
 !--------------------------------------------------------------------------------
 !                               2. Using PPFORM
 !
     IF( nlppform ) THEN
 !
 !   Compute PPFORM from BCOEF
        IF( PRESENT(c) ) THEN
           ALLOCATE(work2(d3,k1,n1,k2,n2))
           ALLOCATE(work1(d2,d3,k1,n1))
           CALL topp2(sp2%sp1, c, work1)
           CALL topp3(sp2%sp2, work1, work2)
           DEALLOCATE(work1)
           IF( PRESENT(ppform) )THEN
              CALL topp4(sp%sp3, work2, ppform)
           ELSE
              IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform)
              ALLOCATE(sp%ppform(k1,n1,k2,n2,k3,n3))
              CALL topp4(sp%sp3, work2, sp%ppform)
           END IF
           DEALLOCATE(work2)
        END IF
 !
 !   Compute function/derivatives
        ALLOCATE(temp1(k2,k3))
        ALLOCATE(temp2(k3))
        DO ipz=1,SIZE(z)
           DO ipy=1,SIZE(y)
              DO ipx=1,SIZE(x)
                 IF(PRESENT(ppform)) THEN
                    CALL ppval(sp2%sp1, x(ipx), &
                         &  ppform(:,leftx(ipx)+1,:,lefty(ipy)+1,:,leftz(ipz)+1),&
                         &  leftx(ipx), jder(1), temp1)
                 ELSE
                    CALL ppval(sp2%sp1, x(ipx), &
                         &  sp%ppform(:,leftx(ipx)+1,:,lefty(ipy)+1,:,leftz(ipz)+1),&
                         &  leftx(ipx), jder(1), temp1)
                 END IF
                 CALL ppval(sp2%sp2, y(ipy), temp1, lefty(ipy), jder(2), &
                      &     temp2)
                 CALL ppval(sp%sp3, z(ipz), temp2, leftz(ipz), jder(3), &
                      &     fp(ipx,ipy,ipz))
              END DO
           END DO
        END DO
        DEALLOCATE(temp1)
        DEALLOCATE(temp2)
 !--------------------------------------------------------------------------------
 !                               3. Using spline expansion
 !
     ELSE
        IF( PRESENT(c) ) THEN
           IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs)
           ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2),SIZE(c,3)))
           sp%bcoefs = c
        END IF
 !
 !   Compute function/derivatives
        ALLOCATE(funx(1:k1,0:jder(1)))
        ALLOCATE(funy(1:k2,0:jder(2)))
        ALLOCATE(funz(1:k3,0:jder(3)))
        fp = 0.0d0
        DO ipz=1,SIZE(z)
           CALL basfun(z(ipz), sp%sp3, funz, leftz(ipz)+1)
           DO ipy=1,SIZE(y)
              CALL basfun(y(ipy), sp2%sp2, funy, lefty(ipy)+1)
              DO ipx=1,SIZE(x)
                 CALL basfun(x(ipx), sp2%sp1, funx, leftx(ipx)+1)
                 DO kk=1,k3
                    DO jj=1,k2
                       DO ii=1,k1
                          fp(ipx,ipy,ipz) = fp(ipx,ipy,ipz) + &
                            &  sp%bcoefs(leftx(ipx)+ii,lefty(ipy)+jj,leftz(ipz)+kk) * &
                            &  funx(ii,jder(1)) * funy(jj,jder(2)) * funz(kk,jder(3))
                       END DO
                    END DO
                 END DO
              END DO
           END DO
        END DO
        DEALLOCATE(funx, funy, funz)
     END IF
   END SUBROUTINE gridval2d1d_3d
 !===========================================================================
   SUBROUTINE gridval2d1d_1d(sp, xp, yp, zp, fp, jder, c, ppform)
 !
 !  Compute values or jder-th dervivative of f(x,y,z) from spline
 !  coefficients (nlppform=.false.) or ppform (nlppform=.true.)
 !
 !              F(I) = F(X(I),Y(I),Z(I))
 !
     TYPE(spline2d1d), TARGET :: sp
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp, zp
     DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: fp
     INTEGER, INTENT(in) :: jder(3)
     DOUBLE PRECISION, DIMENSION(:,:,:), OPTIONAL, INTENT(in) :: c
     DOUBLE PRECISION, DIMENSION(:,:,:,:,:,:), OPTIONAL :: ppform
 !
     TYPE(spline2d), POINTER :: sp2
     DOUBLE PRECISION, ALLOCATABLE :: work1(:,:,:,:), work2(:,:,:,:,:)
     DOUBLE PRECISION, ALLOCATABLE :: temp1(:,:), temp2(:)
     DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:), funz(:,:)
     DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)), z(SIZE(zp))
     INTEGER          :: leftx(SIZE(xp)), lefty(SIZE(yp)), leftz(SIZE(zp))
     INTEGER :: d1, d2, d3, k1, k2, k3, n1, n2, n3
     INTEGER :: np, ip, ii, jj, kk
     LOGICAL :: nlppform
 !--------------------------------------------------------------------------------
 !                               1. Prologue
 !
     sp2 => sp%sp12
     d1 = sp2%sp1%dim
     d2 = sp2%sp2%dim
     d3 = sp%sp3%dim
     k1 = sp2%sp1%order
     k2 = sp2%sp2%order
     k3 = sp%sp3%order
     n1 = sp2%sp1%nints
     n2 = sp2%sp2%nints
     n3 = sp%sp3%nints
     np = SIZE(xp)
     nlppform = sp2%sp1%nlppform .OR. sp2%sp2%nlppform .OR. sp%sp3%nlppform
 !
 !   Applly periodicity if required
     IF( sp2%sp1%period ) THEN
        x = sp2%sp1%knots(0) + MODULO(xp-sp2%sp1%knots(0), sp2%sp1%lperiod)
     ELSE
        x = xp
     END IF
     IF( sp2%sp2%period ) THEN
        y = sp2%sp2%knots(0) + MODULO(yp-sp2%sp2%knots(0), sp2%sp2%lperiod)
     ELSE
        y = yp
     END IF
     IF( sp%sp3%period ) THEN
        z = sp%sp3%knots(0) + MODULO(zp-sp%sp3%knots(0), sp%sp3%lperiod)
     ELSE
        z = zp
     END IF
 !
 !   Locate interval containing (x,y,z)
     CALL locintv(sp2%sp1, x, leftx)
     CALL locintv(sp2%sp2, y, lefty)
     CALL locintv(sp%sp3,  z, leftz)
 !--------------------------------------------------------------------------------
 !                               2. Using PPFORM
 !
     IF( nlppform ) THEN
 !
 !   Compute PPFORM from BCOEF
        IF( PRESENT(c) ) THEN
           ALLOCATE(work2(d3,k1,n1,k2,n2))
           ALLOCATE(work1(d2,d3,k1,n1))
           CALL topp2(sp2%sp1, c, work1)
           CALL topp3(sp2%sp2, work1, work2)
           DEALLOCATE(work1)
           IF( PRESENT(ppform) )THEN
              CALL topp4(sp%sp3, work2, ppform)
           ELSE
              IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform)
              ALLOCATE(sp%ppform(k1,n1,k2,n2,k3,n3))
              CALL topp4(sp%sp3, work2, sp%ppform)
           END IF
           DEALLOCATE(work2)
        END IF
 !
 !   Compute function/derivatives
        ALLOCATE(temp1(k2,k3))
        ALLOCATE(temp2(k3))
        DO ip=1,np
           IF(PRESENT(ppform)) THEN
              CALL ppval(sp2%sp1, x(ip), &
                   &  ppform(:,leftx(ip)+1,:,lefty(ip)+1,:,leftz(ip)+1),&
                   &  leftx(ip), jder(1), temp1)
           ELSE
              CALL ppval(sp2%sp1, x(ip), &
                   &  sp%ppform(:,leftx(ip)+1,:,lefty(ip)+1,:,leftz(ip)+1),&
                   &  leftx(ip), jder(1), temp1)
           END IF
           CALL ppval(sp2%sp2, y(ip), temp1, lefty(ip), jder(2), temp2)
           CALL ppval(sp%sp3, z(ip), temp2, leftz(ip), jder(3), fp(ip))
        END DO
        DEALLOCATE(temp1)
        DEALLOCATE(temp2)
 !--------------------------------------------------------------------------------
 !                               3. Using spline expansion
 !
     ELSE
        IF( PRESENT(c) ) THEN
           IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs)
           ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2),SIZE(c,3)))
           sp%bcoefs = c
        END IF
 !
 !   Compute function/derivatives
        ALLOCATE(funx(1:k1,0:jder(1)))
        ALLOCATE(funy(1:k2,0:jder(2)))
        ALLOCATE(funz(1:k3,0:jder(3)))
        fp = 0.0d0
        DO ip=1,np
           CALL basfun(x(ip), sp2%sp1, funx, leftx(ip)+1)
           CALL basfun(y(ip), sp2%sp2, funy, lefty(ip)+1)
           CALL basfun(z(ip), sp%sp3,  funz, leftz(ip)+1)
           DO kk=1,k3
              DO jj=1,k2
                 DO ii=1,k1
                    fp(ip) = fp(ip) + &
                            &  sp%bcoefs(leftx(ip)+ii,lefty(ip)+jj,leftz(ip)+kk) * &
                            &  funx(ii,jder(1))*funy(jj,jder(2))*funz(kk,jder(3))
                 END DO
              END DO
           END DO
        END DO
        DEALLOCATE(funx, funy, funz)
     END IF
   END SUBROUTINE gridval2d1d_1d
 !===========================================================================
   SUBROUTINE gridval2d(sp, xp, yp, fp, jder, c, ppform)
 !
 !  Compute values or jder-th dervivative of f(x,y) from ppform 
 !  of spline sp. Recompute the ppform if the optional spline 
 !  coefficients c are given.
 !
 !              F = F(X, Y)
 !
     TYPE(spline2d), INTENT(inout) :: sp
     DOUBLE PRECISION, INTENT(in) :: xp, yp
     DOUBLE PRECISION, INTENT(out) :: fp
     INTEGER, INTENT(in) :: jder(2)
     DOUBLE PRECISION, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c
     DOUBLE PRECISION, DIMENSION(:,:,:,:), OPTIONAL :: ppform
 !
     INTEGER :: d1, d2, k1, k2, n1, n2
     DOUBLE PRECISION, ALLOCATABLE :: work(:,:,:), temp(:)
     DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:)
     DOUBLE PRECISION :: x, y
     INTEGER :: leftx, lefty
     INTEGER :: i, j, k, ii, jj
     LOGICAL :: nlppform
 !
     d1 = sp%sp1%dim
     d2 = sp%sp2%dim
     k1 = sp%sp1%order
     k2 = sp%sp2%order
     n1 = sp%sp1%nints
     n2 = sp%sp2%nints
     nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform
 !
 !   Compute PPFORM/BCOEFS if spline coefs are passed
 !
     IF( PRESENT(c)) THEN
        IF( nlppform ) THEN
           ALLOCATE(work(d2,k1,n1))
           CALL topp1(sp%sp1, c , work)
           IF(PRESENT(ppform)) THEN
              CALL topp2(sp%sp2, work, ppform)
           ELSE
              IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform)
              ALLOCATE(sp%ppform(k1,n1,k2,n2))
              CALL topp2(sp%sp2, work, sp%ppform)
           END IF
           DEALLOCATE(work)
        ELSE
           IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs)
           ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2)))
           sp%bcoefs = c
        END IF
     END IF
 !
 !   Applly periodicity if required
 !
     IF( sp%sp1%period ) THEN   !  ** Applly periodicity **
        x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod)
     ELSE
        x = xp
     END IF
     IF( sp%sp2%period ) THEN   !  ** Applly periodicity **
        y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod)
     ELSE
        y = yp
     END IF
 !
 !  Locate the interval containing x, y
 !
     CALL locintv(sp%sp1, x, leftx)
     CALL locintv(sp%sp2, y, lefty)
 !
 !  Compute function/derivatives
 !
     IF( nlppform ) THEN   ! using PP form
        ALLOCATE(temp(k2))
        IF(PRESENT(ppform)) THEN
           CALL ppval(sp%sp1, x, ppform(:,leftx+1,:,lefty+1),&
                &  leftx, jder(1), temp)
        ELSE
           CALL ppval(sp%sp1, x, sp%ppform(:,leftx+1,:,lefty+1),&
                &  leftx, jder(1), temp)
        END IF
        CALL ppval(sp%sp2, y, temp, lefty, jder(2), fp)
        DEALLOCATE(temp)
     ELSE                   ! using spline expansion
        ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2)))
        fp = 0.0d0
        CALL basfun(x, sp%sp1, funx, leftx+1)
        CALL basfun(y, sp%sp2, funy, lefty+1)
        DO jj=1,k2
           DO ii=1,k1
              fp = fp + &
                   &   funy(jj,jder(2))*sp%bcoefs(leftx+ii,lefty+jj)* &
                   &   funx(ii,jder(1))
           END DO
        END DO
        DEALLOCATE(funx, funy)
     END IF
   END SUBROUTINE gridval2d
 !===========================================================================
   SUBROUTINE gridval2d_1d(sp, xp, yp, fp, jder, c, ppform)
 !
 !  Compute values or jder-th dervivative of f(x,y) from ppform 
 !  of spline sp. Recompute the ppform if the optional spline 
 !  coefficients c are given.
 !
 !              F(I) = F(X(I), Y(I))
 !
     TYPE(spline2d), INTENT(inout) :: sp
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp
     DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: fp
     INTEGER, INTENT(in) :: jder(2)
     DOUBLE PRECISION, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c
     DOUBLE PRECISION, DIMENSION(:,:,:,:), OPTIONAL :: ppform
 !
     INTEGER :: d1, d2, k1, k2, n1, n2, np
     DOUBLE PRECISION, ALLOCATABLE :: work(:,:,:), temp(:)
     DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:)
     DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp))
     INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp))
     INTEGER :: i, j, k, ii, jj
     LOGICAL :: nlppform
 !
     d1 = sp%sp1%dim
     d2 = sp%sp2%dim
     k1 = sp%sp1%order
     k2 = sp%sp2%order
     n1 = sp%sp1%nints
     n2 = sp%sp2%nints
     nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform
 !
 !   Compute PPFORM/BCOEFS if spline coefs are passed
 !
     IF( PRESENT(c)) THEN
        IF( nlppform ) THEN
           ALLOCATE(work(d2,k1,n1))
           CALL topp1(sp%sp1, c , work)
           IF(PRESENT(ppform)) THEN
              CALL topp2(sp%sp2, work, ppform)
           ELSE
              IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform)
              ALLOCATE(sp%ppform(k1,n1,k2,n2))
              CALL topp2(sp%sp2, work, sp%ppform)
           END IF
           DEALLOCATE(work)
        ELSE
           IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs)
           ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2)))
           sp%bcoefs = c
        END IF
     END IF
 !
 !   Applly periodicity if required
 !
     np = SIZE(xp)
     IF( sp%sp1%period ) THEN   !  ** Applly periodicity **
        x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod)
     ELSE
        x = xp
     END IF
     IF( sp%sp2%period ) THEN   !  ** Applly periodicity **
        y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod)
     ELSE
        y = yp
     END IF
 !
 !  Locate the interval containing x, y
 !
     CALL locintv(sp%sp1, x, leftx)
     CALL locintv(sp%sp2, y, lefty)
 !
 !  Compute function/derivatives
 !
     IF( nlppform ) THEN   ! using PP form
        ALLOCATE(temp(k2))
        DO i=1,np
           IF(PRESENT(ppform)) THEN
              CALL ppval(sp%sp1, x(i), ppform(:,leftx(i)+1,:,lefty(i)+1),&
                   &  leftx(i), jder(1), temp)
           ELSE
              CALL ppval(sp%sp1, x(i), sp%ppform(:,leftx(i)+1,:,lefty(i)+1),&
                   &  leftx(i), jder(1), temp)
           END IF
           CALL ppval(sp%sp2, y(i), temp, lefty(i), jder(2), fp(i))
        END DO
        DEALLOCATE(temp)
     ELSE                   ! using spline expansion
        ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2)))
        fp = 0.0d0
        DO i=1,np
           CALL basfun(x(i), sp%sp1, funx, leftx(i)+1)
           CALL basfun(y(i), sp%sp2, funy, lefty(i)+1)
           DO jj=1,k2
              DO ii=1,k1
                 fp(i) = fp(i) + &
                    &   funy(jj,jder(2))*sp%bcoefs(leftx(i)+ii,lefty(i)+jj)* &
                    &   funx(ii,jder(1))
              END DO
           END DO
        END DO
        DEALLOCATE(funx, funy)
     END IF
   END SUBROUTINE gridval2d_1d
 !===========================================================================
   SUBROUTINE gridval2dz(sp, xp, yp, fp, jder, c, ppformz)
 !
 !  Compute values or jder-th dervivative of f(x,y) from ppform 
 !  of spline sp. Recompute the ppform if the optional spline 
 !  coefficients c are given.
 !
 !              F = F(X, Y)
 !
     TYPE(spline2d), INTENT(inout) :: sp
     DOUBLE PRECISION, INTENT(in) :: xp, yp
     DOUBLE COMPLEX, INTENT(out) :: fp
     INTEGER, INTENT(in) :: jder(2)
     DOUBLE COMPLEX, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c
     DOUBLE COMPLEX, DIMENSION(:,:,:,:), OPTIONAL :: ppformz
 !
     INTEGER :: d1, d2, k1, k2, n1, n2
     DOUBLE COMPLEX, ALLOCATABLE :: work(:,:,:), temp(:)
     DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:)
     DOUBLE PRECISION :: x, y
     INTEGER :: leftx, lefty
     INTEGER :: i, j, k, ii, jj
     LOGICAL :: nlppform
 !
     d1 = sp%sp1%dim
     d2 = sp%sp2%dim
     k1 = sp%sp1%order
     k2 = sp%sp2%order
     n1 = sp%sp1%nints
     n2 = sp%sp2%nints
     nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform
 !
 !   Compute PPFORM/BCOEFS if spline coefs are passed
 !
     IF( PRESENT(c)) THEN
        IF( nlppform ) THEN
           ALLOCATE(work(d2,k1,n1))
           CALL topp1z(sp%sp1, c , work)
           IF(PRESENT(ppformz)) THEN
              CALL topp2z(sp%sp2, work, ppformz)
           ELSE
              IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz)
              ALLOCATE(sp%ppformz(k1,n1,k2,n2))
              CALL topp2z(sp%sp2, work, sp%ppformz)
           END IF
           DEALLOCATE(work)
        ELSE
           IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc)
           ALLOCATE(sp%bcoefsc(SIZE(c,1),SIZE(c,2)))
           sp%bcoefsc = c
        END IF
     END IF
 !
 !   Applly periodicity if required
 !
     IF( sp%sp1%period ) THEN   !  ** Applly periodicity **
        x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod)
     ELSE
        x = xp
     END IF
     IF( sp%sp2%period ) THEN   !  ** Applly periodicity **
        y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod)
     ELSE
        y = yp
     END IF
 !
 !  Locate the interval containing x, y
 !
     CALL locintv(sp%sp1, x, leftx)
     CALL locintv(sp%sp2, y, lefty)
 !
 !  Compute function/derivatives
 !
     IF( nlppform ) THEN   ! using PP form
        ALLOCATE(temp(k2))
        IF(PRESENT(ppformz)) THEN
           CALL ppval(sp%sp1, x, ppformz(:,leftx+1,:,lefty+1),&
                &  leftx, jder(1), temp)
        ELSE
           CALL ppval(sp%sp1, x, sp%ppformz(:,leftx+1,:,lefty+1),&
                &  leftx, jder(1), temp)
        END IF
        CALL ppval(sp%sp2, y, temp, lefty, jder(2), fp)
        DEALLOCATE(temp)
     ELSE                   ! using spline expansion
        ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2)))
        fp = (0.0d0,0.0d0)
        CALL basfun(x, sp%sp1, funx, leftx+1)
        CALL basfun(y, sp%sp2, funy, lefty+1)
        DO jj=1,k2
           DO ii=1,k1
              fp = fp + &
                   &   funy(jj,jder(2))*sp%bcoefsc(leftx+ii,lefty+jj)* &
                   &   funx(ii,jder(1))
           END DO
        END DO
        DEALLOCATE(funx, funy)
     END IF
   END SUBROUTINE gridval2dz
 !===========================================================================
   SUBROUTINE gridval2d_1dz(sp, xp, yp, fp, jder, c, ppformz)
 !
 !  Compute values or jder-th dervivative of f(x,y) from ppform 
 !  of spline sp. Recompute the ppform if the optional spline 
 !  coefficients c are given.
 !
 !              F(I) = F(X(I), Y(I))
 !
     TYPE(spline2d), INTENT(inout) :: sp
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp
     DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: fp
     INTEGER, INTENT(in) :: jder(2)
     DOUBLE COMPLEX, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c
     DOUBLE COMPLEX, DIMENSION(:,:,:,:), OPTIONAL :: ppformz
 !
     INTEGER :: d1, d2, k1, k2, n1, n2, np
     DOUBLE COMPLEX, ALLOCATABLE :: work(:,:,:), temp(:)
     DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:)
     DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp))
     INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp))
     INTEGER :: i, j, k, ii, jj
     LOGICAL :: nlppform
 !
     d1 = sp%sp1%dim
     d2 = sp%sp2%dim
     k1 = sp%sp1%order
     k2 = sp%sp2%order
     n1 = sp%sp1%nints
     n2 = sp%sp2%nints
     nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform
 !
 !   Compute PPFORM/BCOEFS if spline coefs are passed
 !
     IF( PRESENT(c)) THEN
        IF( nlppform ) THEN
           ALLOCATE(work(d2,k1,n1))
           CALL topp1z(sp%sp1, c , work)
           IF(PRESENT(ppformz)) THEN
              CALL topp2z(sp%sp2, work, ppformz)
           ELSE
              IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz)
              ALLOCATE(sp%ppformz(k1,n1,k2,n2))
              CALL topp2z(sp%sp2, work, sp%ppformz)
           END IF
           DEALLOCATE(work)
        ELSE
           IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc)
           ALLOCATE(sp%bcoefsc(SIZE(c,1),SIZE(c,2)))
           sp%bcoefsc = c
        END IF
     END IF
 !
 !   Applly periodicity if required
 !
     np = SIZE(xp)
     IF( sp%sp1%period ) THEN   !  ** Applly periodicity **
        x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod)
     ELSE
        x = xp
     END IF
     IF( sp%sp2%period ) THEN   !  ** Applly periodicity **
        y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod)
     ELSE
        y = yp
     END IF
 !
 !  Locate the interval containing x, y
 !
     CALL locintv(sp%sp1, x, leftx)
     CALL locintv(sp%sp2, y, lefty)
 !
 !  Compute function/derivatives
 !
     IF( nlppform ) THEN   ! using PP form
        ALLOCATE(temp(k2))
        DO i=1,np
           IF(PRESENT(ppformz)) THEN
              CALL ppval(sp%sp1, x(i), ppformz(:,leftx(i)+1,:,lefty(i)+1),&
                   &  leftx(i), jder(1), temp)
           ELSE
              CALL ppval(sp%sp1, x(i), sp%ppformz(:,leftx(i)+1,:,lefty(i)+1),&
                   &  leftx(i), jder(1), temp)
           END IF
           CALL ppval(sp%sp2, y(i), temp, lefty(i), jder(2), fp(i))
        END DO
        DEALLOCATE(temp)
     ELSE                   ! using spline expansion
        ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2)))
        fp = (0.0d0,0.0d0)
        DO i=1,np
           CALL basfun(x(i), sp%sp1, funx, leftx(i)+1)
           CALL basfun(y(i), sp%sp2, funy, lefty(i)+1)
           DO jj=1,k2
              DO ii=1,k1
                 fp(i) = fp(i) + &
                    &   funy(jj,jder(2))*sp%bcoefsc(leftx(i)+ii,lefty(i)+jj)* &
                    &   funx(ii,jder(1))
              END DO
           END DO
        END DO
        DEALLOCATE(funx, funy)
     END IF
   END SUBROUTINE gridval2d_1dz
 !===========================================================================
   SUBROUTINE gridval2d_2dz(sp, xp, yp, fp, jder, c, ppformz)
 !
 !  Compute values or jder-th dervivative of f(x,y) from ppform 
 !  of spline sp. Recompute the ppform if the optional spline 
 !  coefficients c are given.
 !
 !              F(I,J) = F(X(I), Y(J))
 !
     TYPE(spline2d), INTENT(inout) :: sp
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp
     DOUBLE COMPLEX, DIMENSION(:,:), INTENT(out) :: fp
     INTEGER, INTENT(in) :: jder(2)
     DOUBLE COMPLEX, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c
     DOUBLE COMPLEX, DIMENSION(:,:,:,:), OPTIONAL :: ppformz
 !
     INTEGER :: d1, d2, k1, k2, n1, n2
     DOUBLE COMPLEX, ALLOCATABLE :: work(:,:,:), temp(:)
     DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:)
     DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp))
     INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp))
     INTEGER :: i, j, k, ii, jj
     LOGICAL :: nlppform
 !
     d1 = sp%sp1%dim
     d2 = sp%sp2%dim
     k1 = sp%sp1%order
     k2 = sp%sp2%order
     n1 = sp%sp1%nints
     n2 = sp%sp2%nints
     nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform
 !
 !   Compute PPFORMM/BCOEFS if spline coefs are passed
 !
     IF( PRESENT(c) ) THEN
        IF( nlppform ) THEN
           ALLOCATE(work(d2,k1,n1))
           CALL topp1z(sp%sp1, c , work)
           IF(PRESENT(ppformz)) THEN
              CALL topp2z(sp%sp2, work, ppformz)
           ELSE
              IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz)
              ALLOCATE(sp%ppformz(k1,n1,k2,n2))
              CALL topp2z(sp%sp2, work, sp%ppformz)
           END IF
           DEALLOCATE(work)
        ELSE
           IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc)
           ALLOCATE(sp%bcoefsc(SIZE(c,1),SIZE(c,2)))
           sp%bcoefsc = c
        END IF
     END IF
 !
 !   Applly periodicity if required
 !
     IF( sp%sp1%period ) THEN   !  ** Applly periodicity **
        x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod)
     ELSE
        x = xp
     END IF
     IF( sp%sp2%period ) THEN   !  ** Applly periodicity **
        y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod)
     ELSE
        y = yp
     END IF
 !
 !   Locate interval containing (x,y)
 !
     CALL locintv(sp%sp1, x, leftx)
     CALL locintv(sp%sp2, y, lefty)
 !
 !  Compute function/derivatives
 !
     IF( nlppform ) THEN   ! using PP form
        ALLOCATE(temp(k2))
        DO j=1,SIZE(y)
           DO i=1,SIZE(x)
              IF(PRESENT(ppformz)) THEN
                 CALL ppval(sp%sp1, x(i), ppformz(:,leftx(i)+1,:,lefty(j)+1),&
                      &     leftx(i), jder(1), temp)
              ELSE
                 CALL ppval(sp%sp1, x(i), sp%ppformz(:,leftx(i)+1,:,lefty(j)+1),&
                      &     leftx(i), jder(1), temp)
              END IF
              CALL ppval(sp%sp2, y(j), temp, lefty(j), jder(2), fp(i,j))
           END DO
        END DO
        DEALLOCATE(temp)
     ELSE                   ! using spline expansion
        ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2)))
        fp = 0.0d0
        DO j=1,SIZE(y)
           CALL basfun(y(j), sp%sp2, funy, lefty(j)+1)
           DO i=1,SIZE(x)
              CALL basfun(x(i), sp%sp1, funx, leftx(i)+1)
              DO jj=1,k2
                 DO ii=1,k1
                    fp(i,j) = fp(i,j) + sp%bcoefsc(leftx(i)+ii,lefty(j)+jj) * &
                         &   funx(ii,jder(1))*funy(jj,jder(2))
                 END DO
              END DO
           END DO
        END DO
        DEALLOCATE(funx, funy)
     END IF
   END SUBROUTINE gridval2d_2dz
 !===========================================================================
   SUBROUTINE calc_fftmass(spl, fftmat)
 !
 !  Compute FT of mass matrix for periodic spline on equidistant mesh
 !
     TYPE(spline1d)                :: spl
     DOUBLE PRECISION, INTENT(out) :: fftmat(0:)
 !
     INTEGER :: dim, nx, nidbas, ngauss
     DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
     DOUBLE COMPLEX, ALLOCATABLE   :: ft_fun(:,:)
     INTEGER :: igauss, intv
 !
     CALL get_dim(spl, dim, nx, nidbas)
     CALL get_gauss(spl, ngauss)
     ALLOCATE(xgauss(ngauss), wgauss(ngauss))
     ALLOCATE(ft_fun(0:nx-1,1))
 !
 !  Integrate on first interval
     intv = 1
     CALL get_gauss(spl, ngauss, intv, xgauss, wgauss)
     fftmat = 0.0d0
     DO igauss=1,ngauss
        CALL ft_basfun(xgauss(igauss), spl, ft_fun, intv)
        fftmat(:) = fftmat(:) + wgauss(igauss)*ft_fun(:,1)*CONJG(ft_fun(:,1))
     END DO
 !
     DEALLOCATE(ft_fun)
     DEALLOCATE(xgauss, wgauss)
   END SUBROUTINE calc_fftmass
 !===========================================================================
   SUBROUTINE calc_fftmass_old(spl, fftmat)
 !
 !  Compute FT of mass matrix for periodic spline on equidistant mesh
 !
     TYPE(spline1d) :: spl
     DOUBLE PRECISION, INTENT(out) :: fftmat(0:)
     INTEGER :: dim, nx, nidbas, ngauss
     DOUBLE PRECISION, ALLOCATABLE :: fun(:,:), xgauss(:), wgauss(:), arow(:)
     INTEGER :: i, j, k, igauss, intv
     DOUBLE PRECISION :: pi, arg0, arg
 !
     CALL get_dim(spl, dim, nx, nidbas)
     CALL get_gauss(spl, ngauss)
     ALLOCATE(fun(0:nidbas,1)) ! Spline
     ALLOCATE(xgauss(ngauss), wgauss(ngauss))
     ALLOCATE(arow(0:nidbas))
 !
 !  Assemble the first row of the upper mass matrix
     arow = 0.0d0
     intv = 1   ! Get splines on Gauss points in first interval
     CALL get_gauss(spl, ngauss, intv, xgauss, wgauss)
     DO igauss=1,ngauss
        CALL basfun(xgauss(igauss), spl, fun, intv)
        DO i=0,nidbas
           DO j=0,nidbas-i
              arow(i)=arow(i)+fun(j,1)*fun(i+j,1)*wgauss(igauss)
           END DO
        END DO
     END DO
 !
 !  Fourier transform
     pi = 4.0d0*ATAN(1.0d0)
     arg0 = 2.0d0*pi/REAL(nx,8)
     DO k=0,nx-1
        fftmat(k) = arow(0)
        arg = k*arg0
        DO i=1,nidbas
           fftmat(k) = fftmat(k) + 2.0d0*arow(i)*COS(i*arg)
        END DO
     END DO
 !
     DEALLOCATE(arow)
     DEALLOCATE(fun)
     DEALLOCATE(xgauss, wgauss)
   END SUBROUTINE calc_fftmass_old
 !===========================================================================
   SUBROUTINE CompMassMatrix1(sp1, sp2, a, b, MassMatrix)
   
 ! Compute cross mass matrix MassMatrix between splines sp1 and sp2 over
 ! interval [a, b]
   
     IMPLICIT NONE
   
     TYPE(spline1d), INTENT(IN) :: sp1, sp2
     DOUBLE PRECISION, INTENT(IN) :: a, b
     DOUBLE PRECISION, DIMENSION(:, :), POINTER :: MassMatrix
     
     INTEGER :: ndim1, n1, nidbas1
     INTEGER :: ndim2, n2, nidbas2
     INTEGER :: nint, int, ngauss, ig, k1, k2, i1, j2, left1, left2
     DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: allknots, xg, wg 
     DOUBLE PRECISION, DIMENSION(:, :), ALLOCATABLE :: fun1, fun2
   
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(sp1, ndim1, n1, nidbas1)
     CALL get_dim(sp2, ndim2, n2, nidbas2)
     
 !    PRINT "('In CompMassMatrix1')"
 !    PRINT "('sp1: dim, #intervals, degree', I, I, I)", ndim1, n1, nidbas1 
 !    PRINT "('sp2: dim, #intervals, degree', I, I, I)", ndim2, n2, nidbas2 
     
     ALLOCATE(fun1(0:nidbas1, 1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun2(0:nidbas2, 1)) ! needs only basis functions (no derivatives)
   
     IF (sp1%period) THEN
        IF (sp2%period) THEN
           ALLOCATE(MassMatrix(n1, n2))
        ELSE
           ALLOCATE(MassMatrix(n1, ndim2))
        END IF
     ELSE
        IF (sp2%period) THEN
           ALLOCATE(MassMatrix(ndim1, n2))
        ELSE
           ALLOCATE(MassMatrix(ndim1, ndim2))
        END IF
     END IF
   
 !
 !   Gauss quadature
 !
     ALLOCATE(allknots(0:n1+n2+3))
     CALL sorted_merge(sp1%knots(0:n1), n1+1, sp2%knots(0:n2), n2+1, a, b, allknots, nint)
     nint = nint-1
   
     ngauss = CEILING(REAL(nidbas1 + nidbas2 + 1, 8)/2.D0)
     ALLOCATE(xg(ngauss), wg(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
     MassMatrix = 0.d0
     DO int = 1, nint
        ! Get gauss abscissas and weights for current interval
        CALL gauleg(allknots(int-1), allknots(int), xg, wg, ngauss)
        DO ig = 1, ngauss
           CALL locintv(sp1, xg(ig), left1)
           CALL locintv(sp2, xg(ig), left2)
           CALL basfun(xg(ig), sp1, fun1, left1+1)
           CALL basfun(xg(ig), sp2, fun2, left2+1)
           DO k1 = 0, nidbas1
              IF (sp1%period) THEN
                 i1 = modulo(left1+1 + k1 -1, n1) +1
              ELSE
                 i1 = left1+1 + k1
              END IF
              
              DO k2 = 0, nidbas2
                 IF (sp2%period) THEN
                    j2 = modulo(left2+1 + k2 -1, n2) +1
                 ELSE
                    j2 = left2+1 + k2
                 END IF
                 
                 MassMatrix(i1, j2) = MassMatrix(i1, j2) + wg(ig)*fun1(k1, 1)*fun2(k2, 1)
              END DO
           END DO
        END DO
     END DO
   
 !===========================================================================
 !              3.0  Epilogue
 !
     DEALLOCATE(xg, wg)
     DEALLOCATE(fun1, fun2)
     DEALLOCATE(allknots)
     
   END SUBROUTINE CompMassMatrix1
 !===========================================================================
   SUBROUTINE CompMassMatrix_gb(sp1, sp2, a, b, MassMatrix)
   
 ! Compute cross mass matrix MassMatrix between splines sp1 and sp2 over
 ! interval [a, b]
   
     IMPLICIT NONE
   
     TYPE(spline1d), INTENT(IN)   :: sp1, sp2
     DOUBLE PRECISION, INTENT(IN) :: a, b
     TYPE(gbmat)                  :: MassMatrix
     
     INTEGER :: ndim1, n1, nidbas1
     INTEGER :: ndim2, n2, nidbas2
     INTEGER :: nint, int, ngauss, ig, k1, k2, i1, j2, left1, left2
     DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: allknots, xg, wg 
     DOUBLE PRECISION, DIMENSION(:, :), ALLOCATABLE :: fun1, fun2
     DOUBLE PRECISION :: val  
     !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(sp1, ndim1, n1, nidbas1)
     CALL get_dim(sp2, ndim2, n2, nidbas2)
     ALLOCATE(fun1(0:nidbas1, 1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun2(0:nidbas2, 1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     ALLOCATE(allknots(0:n1+n2+3))
     CALL sorted_merge(sp1%knots(0:n1), n1+1, sp2%knots(0:n2), n2+1, a, b, allknots, nint)
     nint = nint-1
   
     ngauss = CEILING(REAL(nidbas1 + nidbas2 + 1, 8)/2.D0)
     ALLOCATE(xg(ngauss), wg(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
     DO int = 1, nint
        ! Get gauss abscissas and weights for current interval
        CALL gauleg(allknots(int-1), allknots(int), xg, wg, ngauss)
        DO ig = 1, ngauss
           CALL locintv(sp1, xg(ig), left1)
           CALL locintv(sp2, xg(ig), left2)
           CALL basfun(xg(ig), sp1, fun1, left1+1)
           CALL basfun(xg(ig), sp2, fun2, left2+1)
           DO k1 = 0, nidbas1
              IF (sp1%period) THEN
                 i1 = modulo(left1+1 + k1 -1, n1) +1
              ELSE
                 i1 = left1+1 + k1
              END IF
              
              DO k2 = 0, nidbas2
                 IF (sp2%period) THEN
                    j2 = modulo(left2+1 + k2 -1, n2) +1
                 ELSE
                    j2 = left2+1 + k2
                 END IF
                 val = wg(ig)*fun1(k1, 1)*fun2(k2, 1)
                 CALL updtmat(MassMatrix, i1, j2, val)
              END DO
           END DO
        END DO
     END DO
   
 !===========================================================================
 !              3.0  Epilogue
 !
     DEALLOCATE(xg, wg)
     DEALLOCATE(fun1, fun2)
     DEALLOCATE(allknots)
     
   END SUBROUTINE CompMassMatrix_gb
 !===========================================================================
   SUBROUTINE CompMassMatrix_zgb(sp1, sp2, a, b, MassMatrix)
   
 ! Compute cross mass matrix MassMatrix between splines sp1 and sp2 over
 ! interval [a, b]
   
     IMPLICIT NONE
   
     TYPE(spline1d), INTENT(IN)   :: sp1, sp2
     DOUBLE PRECISION, INTENT(IN) :: a, b
     TYPE(zgbmat)                 :: MassMatrix
     
     INTEGER :: ndim1, n1, nidbas1
     INTEGER :: ndim2, n2, nidbas2
     INTEGER :: nint, int, ngauss, ig, k1, k2, i1, j2, left1, left2
     DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: allknots, xg, wg 
     DOUBLE PRECISION, DIMENSION(:, :), ALLOCATABLE :: fun1, fun2
     DOUBLE COMPLEX :: val  
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(sp1, ndim1, n1, nidbas1)
     CALL get_dim(sp2, ndim2, n2, nidbas2)
     ALLOCATE(fun1(0:nidbas1, 1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun2(0:nidbas2, 1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     ALLOCATE(allknots(0:n1+n2+3))
     CALL sorted_merge(sp1%knots(0:n1), n1+1, sp2%knots(0:n2), n2+1, a, b, allknots, nint)
     nint = nint-1
   
     ngauss = CEILING(REAL(nidbas1 + nidbas2 + 1, 8)/2.D0)
     ALLOCATE(xg(ngauss), wg(ngauss))
 !===========================================================================
 !              2.0 Assembly loop
 !
     DO int = 1, nint
        ! Get gauss abscissas and weights for current interval
        CALL gauleg(allknots(int-1), allknots(int), xg, wg, ngauss)
        DO ig = 1, ngauss
           CALL locintv(sp1, xg(ig), left1)
           CALL locintv(sp2, xg(ig), left2)
           CALL basfun(xg(ig), sp1, fun1, left1+1)
           CALL basfun(xg(ig), sp2, fun2, left2+1)
           DO k1 = 0, nidbas1
              IF (sp1%period) THEN
                 i1 = modulo(left1+1 + k1 -1, n1) +1
              ELSE
                 i1 = left1+1 + k1
              END IF
              
              DO k2 = 0, nidbas2
                 IF (sp2%period) THEN
                    j2 = modulo(left2+1 + k2 -1, n2) +1
                 ELSE
                    j2 = left2+1 + k2
                 END IF
                 val = wg(ig)*fun1(k1, 1)*fun2(k2, 1)
                 CALL updtmat(MassMatrix, i1, j2, val)
              END DO
           END DO
        END DO
     END DO
   
 !===========================================================================
 !              3.0  Epilogue
 !
     DEALLOCATE(xg, wg)
     DEALLOCATE(fun1, fun2)
     DEALLOCATE(allknots)
     
   END SUBROUTINE CompMassMatrix_zgb
 !===========================================================================  
   SUBROUTINE sorted_merge(arr1, n1, arr2, n2, a, b, arrm, nm) 
   
     IMPLICIT NONE
   
 ! Peforms:
 ! 1) Merge of arrays arr1 & arr2 including boundary values a & b
 ! 2) Sorts the merged arrays keeping only values in [a, b]
 ! 3) Removes duplicates
   
     INTEGER, INTENT(IN)  :: n1, n2
     INTEGER, INTENT(OUT) :: nm
     
     DOUBLE PRECISION, INTENT(IN) :: a, b
     DOUBLE PRECISION, INTENT(IN) :: arr1(n1), arr2(n2)
     DOUBLE PRECISION, DIMENSION(*), INTENT(OUT) :: arrm
   
     INTEGER :: i, j
   
 ! Merge the two arrays including a & b
     nm = n1 + n2 + 2
     arrm(1:nm) = (/ a, arr1(1:n1), b, arr2(1:n2) /)
   
 ! Sort
     CALL sort(arrm, nm)
   
 ! Remove duplicates 
     j = 1
     DO i = 2, nm
        IF(arrm(i) .GT. arrm(j)) THEN
           j = j + 1
           arrm(j) = arrm(i)
        END IF
     END DO
     nm = j
   
 ! Remove values outside [a, b]
     j = 0
     DO i = 1, nm
        IF((arrm(i) .GE. a) .AND. (arrm(i) .LE. b)) THEN
           j = j + 1
           arrm(j) = arrm(i)
        END IF
     END DO
     nm = j
   
   END SUBROUTINE sorted_merge
 !===========================================================================
   SUBROUTINE sort(arr, n)
   
 ! Sorts array ARR of length N into ascending numerical order by the 
 ! Shell-Mezgar algorithm. 
 ! See Sec. 8.1 of Numerical Recipes
   
     IMPLICIT NONE
     
     INTEGER, INTENT(IN) :: n 
     DOUBLE PRECISION, DIMENSION(n), INTENT(INOUT) :: arr
    
     INTEGER :: nsort, is, i, j, l, m
     DOUBLE PRECISION :: tmp
     DOUBLE PRECISION, PARAMETER :: tiny = 1D-5
     
     nsort = INT(LOG(REAL(n, 8))/LOG(2.D0) + tiny)
   
     m = n
     DO is = 1, nsort
        m = m/2
        DO j = 1, n-m
           i = j
           DO 
              l = i+m
              IF (arr(l) .LT. arr(i)) THEN
                 tmp = arr(i)
                 arr(i) = arr(l)
                 arr(l) = tmp
                 i = i-m
                 IF (i .LT. 1) EXIT
              ELSE
                 EXIT
              END IF
           END DO
        END DO
     END DO
   
   END SUBROUTINE sort
 !===========================================================================
   LOGICAL FUNCTION is_equid(x, dev)
 !
 !   Check whether mesh is euidistant or not
 !
     DOUBLE PRECISION, INTENT(in)            :: x(0:)
     DOUBLE PRECISION, INTENT(out), OPTIONAL :: dev
 !
     DOUBLE PRECISION :: dx(SIZE(x)-1), dxmin, dxmax, dxaver, e
     DOUBLE PRECISION, PARAMETER :: tol=1.d-6
     INTEGER :: n, i
     n=SIZE(x)-1
     dx = (/ (x(i)-x(i-1),i=1,n) /)
     dxmin = MINVAL(dx)
     dxmax = MAXVAL(dx)
     dxaver = (x(n)-x(0))/REAL(n,8)
     e = (dxmax-dxmin)/dxaver
 !!$    e = (dxmax-dxmin)/(SUM(x)/REAL(n+1))
     is_equid = e.LT.tol
     IF(PRESENT(dev)) dev = e
   END FUNCTION is_equid
 !===========================================================================
   SUBROUTINE create_fine(cmesh, h, fmap)
 !
 !   Create a fine mesh from a coarse mesh and returns its mapping 
 !
     DOUBLE PRECISION, INTENT(in)  :: cmesh(0:)
     DOUBLE PRECISION, INTENT(out) :: h
     INTEGER, POINTER, INTENT(out) :: fmap(:)
 !
     DOUBLE PRECISION, ALLOCATABLE :: fmesh(:)
     DOUBLE PRECISION :: xlen, hmin
     INTEGER :: n, nfine, i, ic
 !
     n = SIZE(cmesh)-1
     xlen = cmesh(n)-cmesh(0)
 !
 !     Minimum interval size
     hmin = xlen
     DO i=1,n
        hmin = MIN(hmin, cmesh(i)-cmesh(i-1))
     END DO
 !
 !     Create the fine mesh
     nfine = CEILING(xlen/hmin) 
     h = xlen / REAL(nfine,8)
     ALLOCATE(fmap(0:nfine))
     ALLOCATE(fmesh(0:nfine))
     fmesh = cmesh(0) + (/ (i*h, i=0,nfine) /)
     fmesh(nfine) = cmesh(n)
 !
 !    Map fine to coarse mesh
     ic = 0
     fmap(0) = ic
     DO i=1,nfine-1
        DO
           IF(fmesh(i).GE.cmesh(ic+1)) THEN 
              ic = ic+1
           ELSE
              EXIT
           END IF
        END DO
        fmap(i) = ic
     END DO
     fmap(nfine) = n-1
 !
     DEALLOCATE(fmesh)
   END SUBROUTINE create_fine
 !===========================================================================
   SUBROUTINE getgradr(sp, xp, yp, f00, f10, f01)
 !
 !   Compute the function f00 and its derivatives
 !     f10 = d/dx f
 !     f01 = d/dy f
 !   assuming that its PPFORM/BCOEFSC was already computed!
 !
     TYPE(spline2d), INTENT(inout)      :: sp
     DOUBLE PRECISION, DIMENSION(:), INTENT(in)     :: xp, yp
     DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: f00, f10, f01
 !
     INTEGER :: np
     DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp))
     INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp))
     INTEGER :: i, ip, ii, jj, nidbas(2)
     DOUBLE PRECISION :: temp0(SIZE(xp),sp%sp2%order), temp1(SIZE(xp),sp%sp2%order)
     DOUBLE PRECISION, ALLOCATABLE, SAVE :: funx(:,:), funy(:,:)
     DOUBLE PRECISION, ALLOCATABLE, SAVE :: ftemp0(:), ftemp1(:)
     LOGICAL :: nlppform
 !
 !   Apply periodicity if required
 !
     np = SIZE(xp)
     nidbas(1) = sp%sp1%order-1
     nidbas(2) = sp%sp2%order-1
     nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform
 !
 !  Locate the interval containing x, y
 !
     CALL locintv(sp%sp1, xp, leftx)
     CALL locintv(sp%sp2, yp, lefty)
     x(:) = xp(:) - sp%sp1%knots(leftx(:))
     y(:) = yp(:) - sp%sp2%knots(lefty(:))
 !
 !  Compute function/derivatives
 !
     IF(nlppform) THEN
 !
 !    Using PPFORM
 !----------
        DO i=1,np
           CALL my_ppval1(nidbas(1), x(i), sp%ppform(:,leftx(i)+1,:,lefty(i)+1), &
                & temp0(i,:), temp1(i,:))
        END DO
 !
        CALL my_ppval0(nidbas(2), y, temp0, 0, f00)
        CALL my_ppval0(nidbas(2), y, temp0, 1, f01)
        CALL my_ppval0(nidbas(2), y, temp1, 0, f10)
     ELSE
 !
 !    Using spline expansion with sp%bcoefsc
 !----------
        IF(.NOT.ALLOCATED(funx)) THEN
           ALLOCATE(funx(0:nidbas(1),0:1))  ! Spline and its first derivative
           ALLOCATE(funy(0:nidbas(2),0:1))
           ALLOCATE(ftemp0(0:nidbas(1)))
           ALLOCATE(ftemp1(0:nidbas(1)))
        END IF
 !
        DO ip=1,np
           CALL my_splines(nidbas(1), x(ip), sp%sp1%val0(:,:,leftx(ip)+1), funx)
           CALL my_splines(nidbas(2), y(ip), sp%sp2%val0(:,:,lefty(ip)+1), funy)
           DO ii=0,nidbas(1)
              ftemp0(ii) = (0.d0,0.d0) 
              ftemp1(ii) = (0.d0,0.d0) 
              DO jj=0,nidbas(2)
                 ftemp0(ii) = ftemp0(ii) + sp%bcoefs(leftx(ip)+1+ii,lefty(ip)+1+jj)*funy(jj,0)
                 ftemp1(ii) = ftemp1(ii) + sp%bcoefs(leftx(ip)+1+ii,lefty(ip)+1+jj)*funy(jj,1)
              END DO
           END DO
           f00(ip) = SUM(funx(:,0)*ftemp0(:))
           f01(ip) = SUM(funx(:,0)*ftemp1(:))
           f10(ip) = SUM(funx(:,1)*ftemp0(:))
        END DO
 !-----------
     END IF
   CONTAINS
 !+++
     SUBROUTINE my_ppval0(p, x, ppform, jder, f)
 !
 !   Compute function and derivatives from the PP representation
 !   for many points x(:)
       INTEGER, INTENT(in) :: p
       DOUBLE PRECISION, INTENT(in) :: x(:)
       DOUBLE PRECISION, INTENT(in) :: ppform(:,:)
       INTEGER, INTENT(in) :: jder
       DOUBLE PRECISION, INTENT(out) :: f(:)
       DOUBLE PRECISION :: fact
       INTEGER :: j
       SELECT CASE (jder)
       CASE(0)            ! function value
          SELECT CASE(p)
          CASE(1)
             f(:) = ppform(:,1) + x(:)*ppform(:,2)
          CASE(2)
             f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*ppform(:,3))
 !!$         CASE(3)
 !!$            f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*(ppform(:,3)+x(:)*ppform(:,4)))
          CASE(3:)
             f(:) = ppform(:,p+1)
             DO j=p,1,-1
                f(:) = f(:)*x(:) + ppform(:,j)
             END DO
          END SELECT
       CASE(1)            ! 1st derivative
          SELECT CASE(p)
          CASE(1)
             f(:) = ppform(:,2)
          CASE(2)
             f(:) = ppform(:,2) + x(:)*2.d0*ppform(:,3)
 !!$         CASE(3)
 !!$            f(:) = ppform(:,2) + x(:)*(2.d0*ppform(:,3)+x(:)*3.0d0*ppform(:,4))
          CASE(3:)
             f(:) = p*ppform(:,p+1)
             DO j=p-1,1,-1
                f(:) = f(:)*x(:) + j*ppform(:,j+1)
             END DO
          END SELECT
       CASE default       ! 2nd and higher derivatives
          f(:) = ppform(:,p+1)
          fact = p-jder
          DO j=p,jder+1,-1
             f(:) = f(:)/fact*j*x(:) + ppform(:,j)
             fact = fact-1.0d0
          END DO
          DO j=2,jder
             f(:) = f(:)*j
          END DO
       END SELECT
     END SUBROUTINE my_ppval0
 !+++
     SUBROUTINE my_ppval1(p, x, ppform, f0, f1)
 !
 !   Compute function and first derivative from the PP representation
       INTEGER, INTENT(in) :: p
       DOUBLE PRECISION, INTENT(in) :: x
       DOUBLE PRECISION, INTENT(in) :: ppform(:,:)
       DOUBLE PRECISION, INTENT(out) :: f0(:)
       DOUBLE PRECISION, INTENT(out) :: f1(:)
       DOUBLE PRECISION :: fact
       INTEGER :: j
       SELECT CASE(p)
       CASE(1)
          f0(:) = ppform(1,:) + x*ppform(2,:)
          f1(:) = ppform(2,:)
       CASE(2)
          f0(:) = ppform(1,:) + x*(ppform(2,:)+x*ppform(3,:))
          f1(:) = ppform(2,:) + x*2.d0*ppform(3,:)
       CASE(3)
          f0(:) = ppform(1,:) + x*(ppform(2,:)+x*(ppform(3,:)+x*ppform(4,:)))
          f1(:) = ppform(2,:) + x*(2.d0*ppform(3,:)+x*3.0d0*ppform(4,:))
       CASE(4:)
          f0 = ppform(p+1,:)
          f1 = f0
          DO j=p,2,-1
             f0(:) = ppform(j,:) + x*f0(:)
             f1(:) = f0(:) + x*f1(:)
          END DO
          f0(:) = ppform(1,:) + x*f0(:)
       END SELECT
     END SUBROUTINE my_ppval1
 !+++
     SUBROUTINE my_splines(p, x, ppform, f)
       INTEGER, INTENT(in) :: p
       DOUBLE PRECISION, INTENT(in) :: x
       DOUBLE PRECISION, INTENT(in) :: ppform(0:p,0:p)
       DOUBLE PRECISION, INTENT(out) :: f(0:p,0:1)
       INTEGER :: i
       DOUBLE PRECISION :: powerx(0:p)
       SELECT CASE(p)
       CASE(1)
          f(0,0) = ppform(0,0) + x*ppform(1,0)
          f(0,1) = ppform(1,0)
          f(1,0) = 1.0-f(0,0)
          f(1,1) = -f(0,1)
       CASE(2)
          f(0,0) = ppform(0,0) + x*(ppform(1,0)+x*ppform(2,0))
          f(0,1) = ppform(1,0) + 2.d0*x*ppform(2,0)
          f(1,0) = ppform(0,1) + x*(ppform(1,1)+x*ppform(2,1))
          f(1,1) = ppform(1,1) + 2.d0*x*ppform(2,1)
          f(2,0) = 1.0 - f(0,0) - f(1,0)
          f(2,1) = - f(0,1) - f(1,1)
       CASE(3)
          f(0,0) = ppform(0,0) + x*(ppform(1,0)+x*(ppform(2,0)+x*ppform(3,0)))
          f(0,1) = ppform(1,0) + x*(2.d0*ppform(2,0)+3.d0*x*ppform(3,0))
          f(1,0) = ppform(0,1) + x*(ppform(1,1)+x*(ppform(2,1)+x*ppform(3,1)))
          f(1,1) = ppform(1,1) + x*(2.d0*ppform(2,1)+3.d0*x*ppform(3,1))
          f(2,0) = ppform(0,2) + x*(ppform(1,2)+x*(ppform(2,2)+x*ppform(3,2)))
          f(2,1) = ppform(1,2) + x*(2.d0*ppform(2,2)+3.d0*x*ppform(3,2))
          f(3,0) = 1.0 - f(0,0) - f(1,0) - f(2,0)
          f(3,1) = - f(0,1) - f(1,1) - f(2,1)
       CASE(4:)
          powerx(0) = 1.d0
          DO i=1,p
             powerx(i) = powerx(i-1)*x
          END DO
          DO i=0,p-1
             f(i,0) = DOT_PRODUCT(ppform(:,i),powerx(:))
          END DO
          f(p,0) = 1.d0 - SUM(f(0:p-1,0))
          f(p,1) = - SUM(f(0:p-1,1))
       END SELECT
     END SUBROUTINE my_splines
 !+++
   END SUBROUTINE getgradr
 !===========================================================================
   SUBROUTINE getgradz(sp, xp, yp, f00, f10, f01)
 !
 !   Compute the function f00 and its derivatives
 !     f10 = d/dx f
 !     f01 = d/dy f
 !   assuming that its PPFORM/BCOEFSC was already computed!
 !
     TYPE(spline2d), INTENT(inout)      :: sp
     DOUBLE PRECISION, DIMENSION(:), INTENT(in)     :: xp, yp
     DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: f00, f10, f01
 !
     INTEGER :: np
     DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp))
     INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp))
     INTEGER :: i, ip, ii, jj, nidbas(2)
     DOUBLE COMPLEX :: temp0(SIZE(xp),sp%sp2%order), temp1(SIZE(xp),sp%sp2%order)
     DOUBLE PRECISION, ALLOCATABLE, SAVE :: funx(:,:), funy(:,:)
     DOUBLE COMPLEX, ALLOCATABLE, SAVE :: ftemp0(:), ftemp1(:)
     LOGICAL :: nlppform
 !
 !   Apply periodicity if required
 !
     np = SIZE(xp)
     nidbas(1) = sp%sp1%order-1
     nidbas(2) = sp%sp2%order-1
     nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform
 !
 !  Locate the interval containing x, y
 !
     CALL locintv(sp%sp1, xp, leftx)
     CALL locintv(sp%sp2, yp, lefty)
     x(:) = xp(:) - sp%sp1%knots(leftx(:))
     y(:) = yp(:) - sp%sp2%knots(lefty(:))
 !
 !  Compute function/derivatives
 !
     IF(nlppform) THEN
 !
 !    Using PPFORM
 !----------
        DO i=1,np
           CALL my_ppval1(nidbas(1), x(i), sp%ppformz(:,leftx(i)+1,:,lefty(i)+1), &
                & temp0(i,:), temp1(i,:))
        END DO
 !
        CALL my_ppval0(nidbas(2), y, temp0, 0, f00)
        CALL my_ppval0(nidbas(2), y, temp0, 1, f01)
        CALL my_ppval0(nidbas(2), y, temp1, 0, f10)
     ELSE
 !
 !    Using spline expansion with sp%bcoefsc
 !----------
        IF(.NOT.ALLOCATED(funx)) THEN
           ALLOCATE(funx(0:nidbas(1),0:1))  ! Spline and its first derivative
           ALLOCATE(funy(0:nidbas(2),0:1))
           ALLOCATE(ftemp0(0:nidbas(1)))
           ALLOCATE(ftemp1(0:nidbas(1)))
        END IF
 !
        DO ip=1,np
           CALL my_splines(nidbas(1), x(ip), sp%sp1%val0(:,:,leftx(ip)+1), funx)
           CALL my_splines(nidbas(2), y(ip), sp%sp2%val0(:,:,lefty(ip)+1), funy)
           DO ii=0,nidbas(1)
              ftemp0(ii) = (0.d0,0.d0) 
              ftemp1(ii) = (0.d0,0.d0) 
              DO jj=0,nidbas(2)
                 ftemp0(ii) = ftemp0(ii) + sp%bcoefsc(leftx(ip)+1+ii,lefty(ip)+1+jj)*funy(jj,0)
                 ftemp1(ii) = ftemp1(ii) + sp%bcoefsc(leftx(ip)+1+ii,lefty(ip)+1+jj)*funy(jj,1)
              END DO
           END DO
           f00(ip) = SUM(funx(:,0)*ftemp0(:))
           f01(ip) = SUM(funx(:,0)*ftemp1(:))
           f10(ip) = SUM(funx(:,1)*ftemp0(:))
        END DO
 !-----------
     END IF
   CONTAINS
 !+++
     SUBROUTINE my_ppval0(p, x, ppform, jder, f)
 !
 !   Compute function and derivatives from the PP representation
 !   for many points x(:)
       INTEGER, INTENT(in) :: p
       DOUBLE PRECISION, INTENT(in) :: x(:)
       DOUBLE COMPLEX, INTENT(in) :: ppform(:,:)
       INTEGER, INTENT(in) :: jder
       DOUBLE COMPLEX, INTENT(out) :: f(:)
       DOUBLE PRECISION :: fact
       INTEGER :: j
       SELECT CASE (jder)
       CASE(0)            ! function value
          SELECT CASE(p)
          CASE(1)
             f(:) = ppform(:,1) + x(:)*ppform(:,2)
          CASE(2)
             f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*ppform(:,3))
          CASE(3)
             f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*(ppform(:,3)+x(:)*ppform(:,4)))
          CASE(4:)
             DO j=p+1,1,-1
                f(:) = f(:)*x(:) + ppform(:,j)
             END DO
          END SELECT
       CASE(1)            ! 1st derivative
          SELECT CASE(p)
          CASE(1)
             f(:) = ppform(:,2)
          CASE(2)
             f(:) = ppform(:,2) + x(:)*2.d0*ppform(:,3)
          CASE(3)
             f(:) = ppform(:,2) + x(:)*(2.d0*ppform(:,3)+x(:)*3.0d0*ppform(:,4))
          CASE(4:)
             DO j=p,1,-1
                f(:) = f(:)*x(:) + j*ppform(:,j+1)
             END DO
          END SELECT
       CASE default       ! 2nd and higher derivatives
          f(:) = ppform(:,p+1)
          fact = p-jder
          DO j=p,jder+1,-1
             f(:) = f(:)/fact*j*x(:) + ppform(:,j)
             fact = fact-1.0d0
          END DO
          DO j=2,jder
             f(:) = f(:)*j
          END DO
       END SELECT
     END SUBROUTINE my_ppval0
 !+++
     SUBROUTINE my_ppval1(p, x, ppform, f0, f1)
 !
 !   Compute function and first derivative from the PP representation
       INTEGER, INTENT(in) :: p
       DOUBLE PRECISION, INTENT(in) :: x
       DOUBLE COMPLEX, INTENT(in) :: ppform(:,:)
       DOUBLE COMPLEX, INTENT(out) :: f0(:)
       DOUBLE COMPLEX, INTENT(out) :: f1(:)
       DOUBLE PRECISION :: fact
       INTEGER :: j
       SELECT CASE(p)
       CASE(1)
          f0(:) = ppform(1,:) + x*ppform(2,:)
          f1(:) = ppform(2,:)
       CASE(2)
          f0(:) = ppform(1,:) + x*(ppform(2,:)+x*ppform(3,:))
          f1(:) = ppform(2,:) + x*2.d0*ppform(3,:)
       CASE(3)
          f0(:) = ppform(1,:) + x*(ppform(2,:)+x*(ppform(3,:)+x*ppform(4,:)))
          f1(:) = ppform(2,:) + x*(2.d0*ppform(3,:)+x*3.0d0*ppform(4,:))
       CASE(4:)
          f0 = ppform(p+1,:)
          f1 = f0
          DO j=p,2,-1
             f0(:) = ppform(j,:) + x*f0(:)
             f1(:) = f0(:) + x*f1(:)
          END DO
          f0(:) = ppform(1,:) + x*f0(:)
       END SELECT
     END SUBROUTINE my_ppval1
 !+++
     SUBROUTINE my_splines(p, x, ppform, f)
       INTEGER, INTENT(in) :: p
       DOUBLE PRECISION, INTENT(in) :: x
       DOUBLE PRECISION, INTENT(in) :: ppform(0:p,0:p)
       DOUBLE PRECISION, INTENT(out) :: f(0:p,0:1)
       INTEGER :: i
       DOUBLE PRECISION :: powerx(0:p)
       SELECT CASE(p)
       CASE(1)
          f(0,0) = ppform(0,0) + x*ppform(1,0)
          f(0,1) = ppform(1,0)
          f(1,0) = 1.0-f(0,0)
          f(1,1) = -f(0,1)
       CASE(2)
          f(0,0) = ppform(0,0) + x*(ppform(1,0)+x*ppform(2,0))
          f(0,1) = ppform(1,0) + 2.d0*x*ppform(2,0)
          f(1,0) = ppform(0,1) + x*(ppform(1,1)+x*ppform(2,1))
          f(1,1) = ppform(1,1) + 2.d0*x*ppform(2,1)
          f(2,0) = 1.0 - f(0,0) - f(1,0)
          f(2,1) = - f(0,1) - f(1,1)
       CASE(3)
          f(0,0) = ppform(0,0) + x*(ppform(1,0)+x*(ppform(2,0)+x*ppform(3,0)))
          f(0,1) = ppform(1,0) + x*(2.d0*ppform(2,0)+3.d0*x*ppform(3,0))
          f(1,0) = ppform(0,1) + x*(ppform(1,1)+x*(ppform(2,1)+x*ppform(3,1)))
          f(1,1) = ppform(1,1) + x*(2.d0*ppform(2,1)+3.d0*x*ppform(3,1))
          f(2,0) = ppform(0,2) + x*(ppform(1,2)+x*(ppform(2,2)+x*ppform(3,2)))
          f(2,1) = ppform(1,2) + x*(2.d0*ppform(2,2)+3.d0*x*ppform(3,2))
          f(3,0) = 1.0 - f(0,0) - f(1,0) - f(2,0)
          f(3,1) = - f(0,1) - f(1,1) - f(2,1)
       CASE(4:)
          powerx(0) = 1.d0
          DO i=1,p
             powerx(i) = powerx(i-1)*x
          END DO
          DO i=0,p-1
             f(i,0) = DOT_PRODUCT(ppform(:,i),powerx(:))
          END DO
          f(p,0) = 1.d0 - SUM(f(0:p-1,0))
          f(p,1) = - SUM(f(0:p-1,1))
       END SELECT
     END SUBROUTINE my_splines
 !+++
   END SUBROUTINE getgradz
 END MODULE bsplines
diff --git a/src/cds_mod.f90 b/src/cds_mod.f90
index b0ca9d6..79ab611 100644
--- a/src/cds_mod.f90
+++ b/src/cds_mod.f90
@@ -1,626 +1,626 @@
 !>
 !> @file cds_mod.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE cds
 !
 !    CDSMAT: Implement sparse matrix using Compressed
 !            Diagonal Storage.
 !
 !    T.M. Tran, CRPP-EPFL
 !    November 2010
 !
   USE mumps_bsplines
   IMPLICIT NONE
 !
   TYPE cds_mat  ! Compressed Diagonal Storage
      INTEGER :: rank
      INTEGER :: kl, ku, ndiags
      INTEGER :: nterms, kmat
      INTEGER :: ny
      INTEGER, DIMENSION(:), POINTER            :: dists => NULL()
      DOUBLE PRECISION, DIMENSION(:), POINTER   :: rowv => NULL()
      DOUBLE PRECISION, DIMENSION(:), POINTER   :: colh => NULL()
      DOUBLE PRECISION, DIMENSION(:), POINTER   :: bal  => NULL()
      DOUBLE PRECISION, DIMENSION(:,:), POINTER :: val => NULL()
      TYPE(mumps_mat), ALLOCATABLE :: mumps
   END TYPE cds_mat
 !
 !--------------------------------------------------------------------------------
   INTERFACE init
      MODULE PROCEDURE init_cds_mat
   END INTERFACE init
   INTERFACE clear_mat
      MODULE PROCEDURE clear_cds_mat
   END INTERFACE clear_mat
   INTERFACE destroy
      MODULE PROCEDURE destroy_cds_mat
   END INTERFACE destroy
   INTERFACE updtmat
      MODULE PROCEDURE updt_cds
   END INTERFACE updtmat
   INTERFACE getele
      MODULE PROCEDURE getele_cds
   END INTERFACE getele
   INTERFACE putele
      MODULE PROCEDURE putele_cds
   END INTERFACE putele
   INTERFACE getcol
      MODULE PROCEDURE getcol_cds
   END INTERFACE getcol
   INTERFACE getrow
      MODULE PROCEDURE getrow_cds
   END INTERFACE getrow
   INTERFACE putcol
      MODULE PROCEDURE putcol_cds
   END INTERFACE putcol
   INTERFACE putrow
      MODULE PROCEDURE putrow_cds
   END INTERFACE putrow
   INTERFACE getdiag
      MODULE PROCEDURE getdiag_cds
   END INTERFACE getdiag
   INTERFACE vmx
      MODULE PROCEDURE vmx_cds, vmxn_cds
   END INTERFACE
   INTERFACE putmat
      MODULE PROCEDURE putmat_cds
   END INTERFACE
   INTERFACE  getmat
      MODULE PROCEDURE getmat_cds
   END INTERFACE
   INTERFACE flops
      MODULE PROCEDURE flops_cds
   END INTERFACE flops
   INTERFACE matnorm
      MODULE PROCEDURE matnorm_cds
   END INTERFACE matnorm
 !
 CONTAINS
  !===========================================================================
   SUBROUTINE init_cds_mat(rank, dists, nterms, mat, bw0, kmat)
 !
 !  Initialize a CDS matrix obtained for a 2d FE discretization
 !  using Splines of orders p(1) and p(2).
 !  Number first the 2nd (periodic) dimension.
 !
     INTEGER, INTENT(in)              :: rank
     INTEGER, ALLOCATABLE, INTENT(in) :: dists(:)
     INTEGER, INTENT(in)              :: nterms
     TYPE(cds_mat)                    :: mat
     INTEGER, OPTIONAL, INTENT(in)    :: bw0, kmat
 !
     INTEGER :: kl, ku
 !
     mat%rank = rank
     mat%nterms = nterms
     mat%ny = 0           ! Used for unicity condition in cyl. geometry.
     IF(PRESENT(kmat)) mat%kmat = kmat
 !
     kl = -LBOUND(dists,1)
     ku =  UBOUND(dists,1)
 !
     mat%kl = kl
     mat%ku = ku
     mat%ndiags = ku + kl + 1
     IF(ASSOCIATED(mat%dists)) DEALLOCATE(mat%dists)
     ALLOCATE(mat%dists(-kl:ku))
     mat%dists = dists
 !
     IF(ASSOCIATED(mat%rowv)) DEALLOCATE(mat%rowv)
     IF(ASSOCIATED(mat%colh)) DEALLOCATE(mat%colh)
     IF(PRESENT(bw0)) THEN
        ALLOCATE(mat%rowv(bw0), mat%colh(bw0))
        mat%rowv = 0.0d0
        mat%colh = 0.0d0
     ELSE
        ALLOCATE(mat%rowv(0))
        ALLOCATE(mat%colh(0))
     END IF
 !
     IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     ALLOCATE(mat%val(rank, -kl:ku))
     mat%val = 0.0d0
 !
     IF(ASSOCIATED(mat%bal)) DEALLOCATE(mat%bal)
     ALLOCATE(mat%bal(rank))
     mat%bal = 0.0d0
   END SUBROUTINE init_cds_mat
 !===========================================================================
   SUBROUTINE clear_cds_mat(mat)
 !
 !   Clear matrix, keeping its sparse structure unchanged
 !
     TYPE(cds_mat) :: mat
 !
     mat%val = 0.0d0
   END SUBROUTINE clear_cds_mat
 !===========================================================================
   SUBROUTINE destroy_cds_mat(mat)
 !
 !   Deallocate pointers in mat
 !
     TYPE(cds_mat) :: mat
     IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     IF( ASSOCIATED(mat%dists)) DEALLOCATE(mat%dists)
     IF( ASSOCIATED(mat%rowv)) DEALLOCATE(mat%rowv)
     IF( ASSOCIATED(mat%colh)) DEALLOCATE(mat%colh)
   END SUBROUTINE destroy_cds_mat
 !===========================================================================
   SUBROUTINE updt_cds(mat, i, j, val)
 !
 !  Update element Aij into sparse CDS matrix
 !
     TYPE(cds_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in)          :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
     INTEGER :: d, k
 !
     d = j-i
     DO k = -mat%kl, mat%ku
        IF( d .EQ. mat%dists(k) ) THEN
           mat%val(i,k) =  mat%val(i,k)+val
           RETURN
        END IF
     END DO
     WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
     WRITE(*,'(a/(10i6))') 'Valid distances', mat%dists
     STOP '*** Abnormal EXIT in MODULE matrix ***'
   END SUBROUTINE updt_cds
 !===========================================================================
   SUBROUTINE getele_cds(mat, i, j, val)
 !
 !  Get element Aij of sparse CDS matrix
 !
     TYPE(cds_mat), INTENT(in)     :: mat
     INTEGER, INTENT(in)           :: i, j
     DOUBLE PRECISION, INTENT(out) :: val
     INTEGER :: d, k
 !
     d = j-i
     DO k = -mat%kl, mat%ku
        IF( d .EQ. mat%dists(k) ) THEN
           val = mat%val(i,k)
           RETURN
        END IF
     END DO
     WRITE(*,'(a,2i6)') 'GETELE: i, j out of range ', i, j
     WRITE(*,'(a/(10i6))') 'Valid distances', mat%dists
     STOP '*** Abnormal EXIT in MODULE matrix ***'
   END SUBROUTINE getele_cds
 !===========================================================================
   SUBROUTINE putele_cds(mat, i, j, val)
 !
 !  Update element Aij into sparse CDS matrix
 !
     TYPE(cds_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in)          :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
     INTEGER :: d, k
 !
     d = j-i
     DO k = -mat%kl, mat%ku
        IF( d .EQ. mat%dists(k) ) THEN
           mat%val(i,k) =  val
           RETURN
        END IF
     END DO
     WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
     WRITE(*,'(a/(10i6))') 'Valid distances', mat%dists
     STOP '*** Abnormal EXIT in MODULE matrix ***'
   END SUBROUTINE putele_cds
 !===========================================================================
   SUBROUTINE getcol_cds(mat, j, arr)
 !
 !   Get a column from matrix
 !
     TYPE(cds_mat), INTENT(in)     :: mat
     INTEGER, INTENT(in)           :: j
     DOUBLE PRECISION, INTENT(out) :: arr(:)
     INTEGER :: n,i,  k
 !
     n = mat%rank
     IF( SIZE(arr) .LT. n ) THEN
        WRITE(*,*) 'GETCOL: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     arr(1:n) = 0.0d0
     DO k=-mat%kl, mat%ku
        i = j-mat%dists(k)
        IF( i.GE.1 .AND. i.LE.n ) arr(i) = mat%val(i,k)
     END DO
   END SUBROUTINE getcol_cds
 !===========================================================================
   SUBROUTINE getrow_cds(mat, i, arr)
 !
 !   Get a row from matrix
 !
     TYPE(cds_mat), INTENT(in)      :: mat
     INTEGER, INTENT(in)           :: i
     DOUBLE PRECISION, INTENT(out) :: arr(:)
     INTEGER :: n, j,  k
 !
     n = mat%rank
     IF( SIZE(arr) .LT. n ) THEN
        WRITE(*,*) 'GETROW: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     arr(1:n) = 0.0d0
     DO k=-mat%kl, mat%ku
        j = i+mat%dists(k)
        IF( j.GE.1 .AND. j.LE.n ) arr(j) = mat%val(i,k)
     END DO
   END SUBROUTINE getrow_cds
 !===========================================================================
   SUBROUTINE putcol_cds(mat, j, arr)
 !
 !   Put a column to matrix
 !
     TYPE(cds_mat)                :: mat
     INTEGER, INTENT(in)          :: j
     DOUBLE PRECISION, INTENT(in) :: arr(:)
     INTEGER :: n,i,  k
 !
     n = mat%rank
     IF( SIZE(arr) .LT. n ) THEN
        WRITE(*,*) 'PUTCOL: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     DO k=-mat%kl, mat%ku
        i = j-mat%dists(k)
        IF( i.GE. 1 .AND. i.LE.n ) mat%val(i,k) = arr(i)
     END DO
   END SUBROUTINE putcol_cds
 !===========================================================================
   SUBROUTINE putrow_cds(mat, i, arr)
 !
 !   Put a row from matrix
 !
     TYPE(cds_mat)                :: mat
     INTEGER, INTENT(in)          :: i
     DOUBLE PRECISION, INTENT(in) :: arr(:)
     INTEGER :: n, j,  k
 !
     n = mat%rank
     IF( SIZE(arr) .LT. n ) THEN
        WRITE(*,*) 'PUTROW: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     DO k=-mat%kl, mat%ku
        j = i+mat%dists(k)
        IF( j.GE.1 .AND. j.LE.n )  mat%val(i,k) = arr(j)
     END DO
   END SUBROUTINE putrow_cds
 !===========================================================================
   SUBROUTINE getdiag_cds(mat, d)
 !
 !   Returns diagonal of matrix
 !
     TYPE(cds_mat)    :: mat
     DOUBLE PRECISION :: d(:)
     INTEGER          :: ny
 !
     d(:) = mat%val(:,0)
 !
 ! The extra row and column implied by periodic BC
 !!$    ny = mat%ny
 !!$    IF( ny .NE. 0 ) THEN
 !!$       d(ny) = mat%rowv(ny) + mat%colh(ny)
 !!$    END IF
 !!$    WRITE(*,'(a/(8(1pe12.3)))') 'd', d
   END SUBROUTINE getdiag_cds
 !===========================================================================
   FUNCTION vmx_cds(mat, xarr)
 !
 !   Return product mat*x
 !
     TYPE(cds_mat)                 :: mat
     DOUBLE PRECISION, INTENT(in)  :: xarr(:)
     DOUBLE PRECISION              :: vmx_cds(SIZE(xarr))
 !
     DOUBLE PRECISION :: alpha=1.0d0, beta=1.0d0
     INTEGER :: m, bw0, ny, k, d, i, i1, i2
 !
     m = mat%rank
     bw0 = SIZE(mat%rowv)
     ny = mat%ny
     vmx_cds = 0.0d0
 !
     IF( ny .NE. 0 ) THEN  ! Contributions from unicity BC
        vmx_cds(ny:bw0) = mat%colh(ny:bw0)*xarr(ny)
        vmx_cds(ny) = vmx_cds(ny) + DOT_PRODUCT(mat%rowv(ny:bw0), xarr(ny:bw0))
     END IF
 !
 #ifdef MKL
     CALL mkl_ddiamv('n', m, m, alpha, 'g', mat%val, m, mat%dists, &
           &         mat%ndiags, xarr, beta, vmx_cds)
 #else
     DO k=-mat%kl,mat%ku
        d = mat%dists(k)
        i1 = MAX(1,1-d)
        i2 = MIN(mat%rank,mat%rank-d)
        DO i=i1,i2
           vmx_cds(i) = vmx_cds(i) + mat%val(i,k)*xarr(i+d)
        END DO
     END DO
 #endif
   END FUNCTION vmx_cds
 !===========================================================================
   FUNCTION vmxn_cds(mat, xarr)
 !
 !   Return product mat*x
 !
     TYPE(cds_mat)                 :: mat
     DOUBLE PRECISION, INTENT(in)  :: xarr(:,:)
     DOUBLE PRECISION              :: vmxn_cds(SIZE(xarr,1),SIZE(xarr,2))
 !
     DOUBLE PRECISION :: alpha=1.0d0, beta=1.0d0
     INTEGER :: m, nrhs, bw0, ny, k, d, i, j, i1, i2
 !
     m = mat%rank
     nrhs = SIZE(xarr,2)
     bw0 = SIZE(mat%rowv)
     ny = mat%ny
     vmxn_cds = 0.0d0
 !
     IF( ny .NE. 0 ) THEN  ! Contributions from unicity BC
        DO j=1,nrhs
           vmxn_cds(ny:bw0,j) =  mat%colh(ny:bw0)*xarr(ny,j)
           vmxn_cds(ny,j) = vmxn_cds(ny,j) + &
                &           DOT_PRODUCT(mat%rowv(ny:bw0), xarr(ny:bw0,j))
        END DO
     END IF
 !
 #ifdef MKL
     CALL mkl_ddiamm('n', m, nrhs, m, alpha, 'g', mat%val, m, &
          &          mat%dists, mat%ndiags, xarr, m, beta, vmxn_cds, m)
 #else
     DO k=-mat%kl,mat%ku
        d = mat%dists(k)
        i1 = MAX(1,1-d)
        i2 = MIN(mat%rank,mat%rank-d)
        DO j=1,nrhs
           DO i=i1,i2
              vmxn_cds(i,j) = vmxn_cds(i,j) + mat%val(i,k)*xarr(i+d,j)
           END DO
        END DO
     END DO
 #endif
   END FUNCTION vmxn_cds
 !===========================================================================
   SUBROUTINE getmat_cds(fid, label, mat)
 !
 !   Read in CDS matrix from hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)                     :: fid
     CHARACTER(len=*), INTENT(in)            :: label
     TYPE(cds_mat)                           :: mat
 !
     CALL getatt(fid, label, 'RANK', mat%rank)
     CALL getatt(fid, label, 'KL', mat%kl)
     CALL getatt(fid, label, 'KU', mat%ku)
     CALL getatt(fid, label, 'NDIAGS', mat%ndiags)
     CALL getatt(fid, label, 'NTERMS', mat%nterms)
     CALL getatt(fid, label, 'KMAT', mat%kmat)
     CALL getatt(fid, label, 'NY', mat%ny)
     IF( ASSOCIATED(mat%dists) ) THEN
        CALL getarr(fid, TRIM(label)//'/dists', mat%dists)
     END IF
     IF(ASSOCIATED(mat%bal)) THEN
        CALL getarr(fid, TRIM(label)//'/bal', mat%bal)
     END IF
     CALL getarr(fid, TRIM(label)//'/vals', mat%val)
     IF(ASSOCIATED(mat%rowv)) THEN
        CALL getarr(fid, TRIM(label)//'/rowv', mat%rowv)
        CALL getarr(fid, TRIM(label)//'/colh', mat%colh)
     END IF
   END SUBROUTINE getmat_cds
 !===========================================================================
   SUBROUTINE putmat_cds(fid, label, mat, str)
 !
 !   Write CDS matrix in hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)                    :: fid
     CHARACTER(len=*), INTENT(in)           :: label
     TYPE(cds_mat)                          :: mat
     CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
 !
     IF(PRESENT(str)) THEN
        CALL creatg(fid, label, str)
     ELSE
        CALL creatg(fid, label)
     END IF
     CALL attach(fid, label, 'RANK', mat%rank)
     CALL attach(fid, label, 'KL', mat%kl)
     CALL attach(fid, label, 'KU', mat%ku)
     CALL attach(fid, label, 'NDIAGS', mat%ndiags)
     CALL attach(fid, label, 'NTERMS', mat%nterms)
     CALL attach(fid, label, 'KMAT', mat%kmat)
     CALL attach(fid, label, 'NY', mat%ny)
     IF( ASSOCIATED(mat%dists) ) THEN
        CALL putarr(fid, TRIM(label)//'/dists', mat%dists)
     END IF
     IF(ASSOCIATED(mat%bal)) THEN
        CALL putarr(fid, TRIM(label)//'/bal', mat%bal)
     END IF
     CALL putarr(fid, TRIM(label)//'/vals', mat%val)
     IF(ASSOCIATED(mat%rowv)) THEN
        CALL putarr(fid, TRIM(label)//'/rowv', mat%rowv)
        CALL putarr(fid, TRIM(label)//'/colh', mat%colh)
     END IF
   END SUBROUTINE putmat_cds
 !===========================================================================
   FUNCTION flops_cds(mat, xarr, ny)
 !
 !   Return FLOPS in product mat*x
 !
     TYPE(cds_mat)                 :: mat
     DOUBLE PRECISION, INTENT(in)  :: xarr(:)
     DOUBLE PRECISION              :: flops_cds
     INTEGER, OPTIONAL, INTENT(in) :: ny
 !
     INTEGER :: k, d, i, i1, i2
 !
     flops_cds = 0.0d0
     IF( PRESENT(ny) ) THEN  ! Contributions from unicity BC
        flops_cds = 4.0d0*(SIZE(mat%rowv)-ny+1)
     END IF
     DO k=-mat%kl,mat%ku
        d = mat%dists(k)
        i1 = MAX(1,1-d)
        i2 = MIN(mat%rank,mat%rank-d)
        flops_cds = flops_cds + 2.0d0*(i2-i1+1)
     END DO
   END FUNCTION flops_cds
 !===========================================================================
   SUBROUTINE cds2mumps(mat, mat_mumps)
 !
 !   Fill mumps structure (based on routine to_mumps_mat)
 !
     INCLUDE 'mpif.h'
     TYPE(cds_mat)   :: mat
     TYPE(mumps_mat) :: mat_mumps
 !
     INTEGER :: i, ii, i1, i2, j, k, rank, d, bw0, s, e
     INTEGER :: comm, ierr, nnz_loc
 !
     CALL init(mat%rank, mat%nterms, mat_mumps)
 !
     comm = mat_mumps%mumps_par%COMM
     mat_mumps%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix
 !
 !    Compute nnz_loc
 !
     rank = mat_mumps%rank
     s = mat_mumps%istart
     e = mat_mumps%iend
 !
     nnz_loc=0
     DO k=-mat%kl,mat%ku
        d = mat%dists(k)
        i1 = MAX(s,1-d)
        i2 = MIN(e,rank-d)
        nnz_loc = nnz_loc + (i2-i1+1)
     END DO
 !
 !    Extra col and row from unicity conditions
 !
     bw0 = SIZE(mat%rowv)
     IF(bw0.GT.0) THEN
        IF(mat%ny.GE.s .AND. mat%ny.LE.e) THEN
           nnz_loc = nnz_loc + bw0-mat%ny ! rowh(ny+1:bw0) 
        END IF
        nnz_loc = nnz_loc + (MIN(bw0,e)-MAX(mat%ny,s)) ! colh(ny+1:bw0)
     END IF
 !
     mat_mumps%nnz_start = 0
     CALL mpi_exscan(nnz_loc, mat_mumps%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
     mat_mumps%nnz_start = mat_mumps%nnz_start + 1
     mat_mumps%nnz_end = mat_mumps%nnz_start + nnz_loc - 1
     mat_mumps%nnz_loc = nnz_loc
     CALL mpi_allreduce(nnz_loc, mat_mumps%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
 !
     mat_mumps%mumps_par%N = rank
     mat_mumps%mumps_par%NZ_loc = nnz_loc
 !
 !     Construct MUMPS (IRN, JCN, A)
 !
     ALLOCATE(mat_mumps%mumps_par%IRN_loc(nnz_loc))
     ALLOCATE(mat_mumps%mumps_par%JCN_loc(nnz_loc))
     ALLOCATE(mat_mumps%mumps_par%A_loc(nnz_loc))
 !
     ii=0
     DO k=-mat%kl,mat%ku
        d = mat%dists(k)
        i1 = MAX(s,1-d)
        i2 = MIN(e,rank-d)
        DO i=i1,i2
           ii = ii+1
           mat_mumps%mumps_par%IRN_loc(ii) = i
           mat_mumps%mumps_par%JCN_loc(ii) = i+d
           mat_mumps%mumps_par%A_loc(ii)   = mat%val(i,k)
        END DO
     END DO
 !
     IF(bw0.GT.0) THEN
        IF(mat%ny.GE.s .AND. mat%ny.LE.e) THEN
           DO j=mat%ny+1,bw0    ! rowh(ny+1:bw0)
              ii = ii+1
              mat_mumps%mumps_par%IRN_loc(ii) = mat%ny
              mat_mumps%mumps_par%JCN_loc(ii) = j
              mat_mumps%mumps_par%A_loc(ii)   = mat%rowv(j)
           END DO
        END IF
        DO i=MAX(mat%ny,s)+1,MIN(bw0,e) ! colh(ny+1:bw0)
           ii = ii+1
           mat_mumps%mumps_par%IRN_loc(ii) = i
           mat_mumps%mumps_par%JCN_loc(ii) = mat%ny
           mat_mumps%mumps_par%A_loc(ii)   = mat%colh(i)
        END DO
     END IF
 !    
     CALL destroy(mat_mumps%mat)
     NULLIFY(mat_mumps%mat)
   END SUBROUTINE cds2mumps
 !===========================================================================
   DOUBLE PRECISION FUNCTION matnorm_cds(mat, p)
 !
 !   Compute matrix norm
 !
     TYPE(cds_mat), INTENT(in)              :: mat
     CHARACTER(len=*), OPTIONAL, INTENT(in) :: p
 !
     CHARACTER(len=4) :: norm_type
     INTEGER          :: i, j, k, d
     DOUBLE PRECISION :: temp(mat%rank)
 !
     norm_type = 'fro'
     IF(PRESENT(p)) norm_type = p
 !
     SELECT CASE (norm_type)
     CASE ('inf')
        DO i=1,mat%rank
           temp(i) = SUM(ABS(mat%val(i,:)))
        END DO
        matnorm_cds = MAXVAL(temp)
     CASE ('1')
        temp = 0.0d0
        DO k=-mat%kl,mat%ku
           d = mat%dists(k)
           DO i=MAX(1,1-d),MIN(mat%rank,mat%rank-d)
              temp(i+d) = temp(i+d) + ABS(mat%val(i,k))
           END DO
        END DO
        matnorm_cds = MAXVAL(temp)
     CASE('fro')
        matnorm_cds = SQRT(SUM(mat%val**2))
     END SELECT
   END FUNCTION matnorm_cds
 !===========================================================================
 !
 END MODULE cds
diff --git a/src/conmat.f90 b/src/conmat.f90
index 3a489e5..887994c 100644
--- a/src/conmat.f90
+++ b/src/conmat.f90
@@ -1,257 +1,257 @@
 !>
 !> @file conmat.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE conmat_mod
 !
 !    CONMAT: Matrix construction for FE discretization.
 !
 !    T.M. Tran, CRPP-EPFL
 !    November 2011
 !
   USE bsplines
   USE matrix
 #ifdef MKL
   USE pardiso_bsplines
 #endif
   IMPLICIT NONE
 !
   INTERFACE conrhs
      MODULE PROCEDURE conrhs_r, conrhs_z
   END INTERFACE conrhs
   INTERFACE conmat
      MODULE PROCEDURE conmat_1d_gb, conmat_1d_ge, conmat_1d_pb, conmat_1d_periodic, &
           &           conmat_1d_zgb, conmat_1d_zpb, conmat_1d_zperiodic, &
           &           conmat_gb, conmat_pb, &
           &           conmat_zgb, conmat_zpb
   END INTERFACE conmat
 #ifdef MKL
   INTERFACE conmat
      MODULE PROCEDURE conmat_1d_pardiso,  conmat_1d_zpardiso, &
           &            conmat_pardiso, conmat_zpardiso
   END INTERFACE conmat
 #endif
 !
 CONTAINS
 !===========================================================================
   SUBROUTINE conmat_1d_gb(spl, mat, coefeq, maxder)
 !
 !   Construction of FE matrix mat for 1D differential operator
 !   using spline spl
 !
     TYPE(gbmat)                :: mat
     TYPE(spline1d), INTENT(in) :: spl
 !
     INCLUDE 'conmat_1d.tpl'
   END SUBROUTINE conmat_1d_gb
 !===========================================================================
   SUBROUTINE conmat_1d_ge(spl, mat, coefeq, maxder)
 !
 !   Construction of FE matrix mat for 1D differential operator
 !   using spline spl
 !
     TYPE(gemat)                :: mat
     TYPE(spline1d), INTENT(in) :: spl
 !
     INCLUDE 'conmat_1d.tpl'
   END SUBROUTINE conmat_1d_ge
 !===========================================================================
   SUBROUTINE conmat_1d_pb(spl, mat, coefeq, maxder)
 !
 !   Construction of FE matrix mat for 1D differential operator
 !   using spline spl
 !
     TYPE(pbmat)                :: mat
     TYPE(spline1d), INTENT(in) :: spl
 !
     INCLUDE 'conmat_1d.tpl'
   END SUBROUTINE conmat_1d_pb
 !===========================================================================
   SUBROUTINE conmat_1d_periodic(spl, mat, coefeq, maxder)
 !
 !   Construction of FE matrix mat for 1D differential operator
 !   using spline spl
 !
     TYPE(periodic_mat)         :: mat
     TYPE(spline1d), INTENT(in) :: spl
 !
     INCLUDE 'conmat_1d.tpl'
   END SUBROUTINE conmat_1d_periodic
 !===========================================================================
   SUBROUTINE conmat_1d_zgb(spl, mat, coefeq, maxder)
 !
 !   Construction of FE matrix mat for 1D differential operator
 !   using spline spl
 !
     TYPE(zgbmat)               :: mat
     TYPE(spline1d), INTENT(in) :: spl
 !
     INCLUDE 'zconmat_1d.tpl'
   END SUBROUTINE conmat_1d_zgb
 !===========================================================================
   SUBROUTINE conmat_1d_zpb(spl, mat, coefeq, maxder)
 !
 !   Construction of FE matrix mat for 1D differential operator
 !   using spline spl
 !
     TYPE(zpbmat)               :: mat
     TYPE(spline1d), INTENT(in) :: spl
 !
     INCLUDE 'zconmat_1d.tpl'
   END SUBROUTINE conmat_1d_zpb
 !===========================================================================
   SUBROUTINE conmat_1d_zperiodic(spl, mat, coefeq, maxder)
 !
 !   Construction of FE matrix mat for 1D differential operator
 !   using spline spl
 !
     TYPE(zperiodic_mat)        :: mat
     TYPE(spline1d), INTENT(in) :: spl
 !
     INCLUDE 'zconmat_1d.tpl'
   END SUBROUTINE conmat_1d_zperiodic
 !===========================================================================
   SUBROUTINE conmat_gb(spl, mat, coefeq, maxder, nat_order)
 !
 !   Construction of FE matrix mat for 2D differential operator
 !   using spline spl
 !
     TYPE(gbmat)                :: mat
     TYPE(spline2d), INTENT(in) :: spl
 !
     INCLUDE 'conmat.tpl'
   END SUBROUTINE conmat_gb
 !===========================================================================
   SUBROUTINE conmat_pb(spl, mat, coefeq, maxder, nat_order)
 !
 !   Construction of FE matrix mat for 2D differential operator
 !   using spline spl
 !
     TYPE(pbmat)                :: mat
     TYPE(spline2d), INTENT(in) :: spl
 !
     INCLUDE 'conmat.tpl'
   END SUBROUTINE conmat_pb
 !===========================================================================
   SUBROUTINE conmat_zgb(spl, mat, coefeq, maxder, nat_order)
 !
 !   Construction of FE matrix mat for 2D differential operator
 !   using spline spl
 !
     TYPE(zgbmat)               :: mat
     TYPE(spline2d), INTENT(in) :: spl
 !
     INCLUDE 'zconmat.tpl'
   END SUBROUTINE conmat_zgb
 !===========================================================================
   SUBROUTINE conmat_zpb(spl, mat, coefeq, maxder, nat_order)
 !
 !   Construction of FE matrix mat for 2D differential operator
 !   using spline spl
 !
     TYPE(zpbmat)               :: mat
     TYPE(spline2d), INTENT(in) :: spl
 !
     INCLUDE 'zconmat.tpl'
   END SUBROUTINE conmat_zpb
 !===========================================================================
   SUBROUTINE conrhs_r(spl, farr, frhs)
 !
 !  Projection of RHS on spline basis functions
 !
     TYPE(spline1d)                :: spl
     DOUBLE PRECISION, INTENT(out) :: farr(:)
     INTERFACE
        DOUBLE PRECISION FUNCTION frhs(x)
          DOUBLE PRECISION, INTENT(in) :: x
        END FUNCTION frhs
     END INTERFACE
     DOUBLE PRECISION :: contrib
 !
     INCLUDE 'conrhs.tpl'
   END SUBROUTINE conrhs_r
 !===========================================================================
   SUBROUTINE conrhs_z(spl, farr, frhs)
 !
 !  Projection of RHS on spline basis functions
 !
     TYPE(spline1d)              :: spl
     DOUBLE COMPLEX, INTENT(out) :: farr(:)
     INTERFACE
        DOUBLE COMPLEX FUNCTION frhs(x)
          DOUBLE PRECISION, INTENT(in) :: x
        END FUNCTION frhs
     END INTERFACE
     DOUBLE COMPLEX :: contrib
 !
     INCLUDE 'conrhs.tpl'
   END SUBROUTINE conrhs_z
 !===========================================================================
 #ifdef MKL
   SUBROUTINE conmat_1d_pardiso(spl, mat, coefeq, maxder)
 !
 !   Construction of FE matrix mat for 1D differential operator
 !   using spline spl
 !
     TYPE(pardiso_mat)          :: mat
     TYPE(spline1d), INTENT(in) :: spl
 !
     INCLUDE 'conmat_1d.tpl'
   END SUBROUTINE conmat_1d_pardiso
 !===========================================================================
   SUBROUTINE conmat_1d_zpardiso(spl, mat, coefeq, maxder)
 !
 !   Construction of FE matrix mat for 1D differential operator
 !   using spline spl
 !
     TYPE(zpardiso_mat)         :: mat
     TYPE(spline1d), INTENT(in) :: spl
 !
     INCLUDE 'zconmat_1d.tpl'
   END SUBROUTINE conmat_1d_zpardiso
 !===========================================================================
   SUBROUTINE conmat_pardiso(spl, mat, coefeq, maxder, nat_order)
 !
 !   Construction of FE matrix mat for 2D differential operator
 !   using spline spl
 !
     TYPE(pardiso_mat)          :: mat
     TYPE(spline2d), INTENT(in) :: spl
 !
     INCLUDE 'conmat.tpl'
   END SUBROUTINE conmat_pardiso
 !===========================================================================
   SUBROUTINE conmat_zpardiso(spl, mat, coefeq, maxder, nat_order)
 !
 !   Construction of FE matrix mat for 2D differential operator
 !   using spline spl
 !
     TYPE(zpardiso_mat)         :: mat
     TYPE(spline2d), INTENT(in) :: spl
 !
     INCLUDE 'zconmat.tpl'
   END SUBROUTINE conmat_zpardiso
 !===========================================================================
 #endif
 END MODULE conmat_mod
diff --git a/src/conmat.tpl b/src/conmat.tpl
index 09f96c6..f3d7ccb 100644
--- a/src/conmat.tpl
+++ b/src/conmat.tpl
@@ -1,213 +1,213 @@
 !>
 !> @file conmat.tpl
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !   In this version s[lines are precalculted
 !   (on all n1/n2 intervals
 !
   INTERFACE
      SUBROUTINE coefeq(x, y, idt, idw, c)
        DOUBLE PRECISION, INTENT(in) :: x, y
        INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
        DOUBLE PRECISION, INTENT(out) :: c(:)
      END SUBROUTINE coefeq
   END INTERFACE
   INTEGER, OPTIONAL :: maxder(2)   ! maximum oder of derivatives
   LOGICAL, OPTIONAL :: nat_order   ! Natural ordering for 2d-1d mapping
 !
   INTEGER :: n1, nidbas1, ndim1, n1e
   INTEGER :: n2, nidbas2, ndim2, n2e
   INTEGER :: ng1, ng2
   INTEGER :: i1, i2, ig1, ig2
   INTEGER :: igt1, igt2, igw1, igw2, irow, jcol
   INTEGER, ALLOCATABLE :: left1(:), left2(:)
 !
   LOGICAL :: nlper1, nlper2, nlnat
 !
   INTEGER :: kterms         ! Number of terms in weak form
   INTEGER :: k, kmaxder, it1, iw1, it2, iw2
   INTEGER, ALLOCATABLE :: idert(:,:), iderw(:,:)  ! Derivative order
   DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)  ! Terms in weak form
 !
   DOUBLE PRECISION, POINTER :: xg1(:,:), xg2(:,:), wg1(:,:), wg2(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:,:), fun2(:,:,:,:)
   DOUBLE PRECISION, ALLOCATABLE :: mata(:,:,:,:), matc(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: matg(:,:,:), matf(:,:,:), matcg(:,:,:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
   CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
   nlper1 = spl%sp1%period
   nlper2 = spl%sp2%period
 !
   n1e = n1+nidbas1  ! Number of elements in 1st coordinate
   n2e = n2+nidbas2  ! Number of elements in 2nd coordinate
   iF(nlper2) n2e = n2
 !
 !   Gauss points and weights on all intervals
 !
   xg1 => spl%sp1%gausx  ! xg1(ng1,n1)
   wg1 => spl%sp1%gausw  ! wg1(ng1,n1)
   ng1 = SIZE(xg1,1)
   xg2 => spl%sp2%gausx
   wg2 => spl%sp2%gausw
   ng2 = SIZE(xg2,1)
 !
 !   Splines on all intervals
 !
   kmaxder = 1
   IF(PRESENT(maxder)) kmaxder = maxder(1)
   ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1,n1))
   ALLOCATE(left1(ng1))
   DO i1=1,n1
      left1 = i1
      CALL basfun(xg1(:,i1), spl%sp1, fun1(:,:,:,i1), left1)
   END DO
   DEALLOCATE(left1)
 !
   kmaxder = 1
   IF(PRESENT(maxder)) kmaxder = maxder(2)
   ALLOCATE(fun2(0:nidbas2,0:kmaxder,ng2,n2))
   ALLOCATE(left2(ng2))
   DO i2=1,n2
      left2 = i2
      CALL basfun(xg2(:,i2), spl%sp2, fun2(:,:,:,i2), left2)
   END DO
   DEALLOCATE(left2)
 !
 !   Ordering in local to global matrix mapping
 !
   nlnat = .FALSE.
   IF(PRESENT(nat_order)) nlnat = nat_order
 !===========================================================================
 !              2.0 Assembly loop
 !
 !   Weak form
 !
   kterms = mat%nterms
   ALLOCATE(idert(kterms,2))
   ALLOCATE(iderw(kterms,2))
   ALLOCATE(coefs(kterms,ng1,ng2))
 !
 !   Allocate local matrices
 !
   ALLOCATE(mata(0:nidbas1,0:nidbas1,0:nidbas2,0:nidbas2))
   ALLOCATE(matc(ng1,ng2))
   ALLOCATE(matg(0:nidbas2,0:nidbas2,ng2))
   ALLOCATE(matf(0:nidbas1,0:nidbas1,ng1))
   ALLOCATE(matcg(ng1,0:nidbas2,0:nidbas2))
 !
   DO i1=1,n1
      DO i2=1,n2
 !
 !   Coefficients of the weak form
 !
         DO ig1=1,ng1
            DO ig2=1,ng2
               CALL coefeq(xg1(ig1,i1), xg2(ig2,i2), &
                    &      idert, iderw, coefs(:,ig1,ig2))
            END DO
         END DO
 !
 !   Compute local matrix: A <- E*(C*D^T) + A
 !
         mata = 0.0d0
         DO k=1,kterms
 !
            matc(1:ng1,1:ng2) = coefs(k,1:ng1,1:ng2)
 !
            DO it1=0,nidbas1
               DO iw1=0,nidbas1
                  DO ig1=1,ng1
                     matf(it1,iw1,ig1) = wg1(ig1,i1) * &
                          &  fun1(it1,idert(k,1),ig1,i1) * &
                          &  fun1(iw1,iderw(k,1),ig1,i1)
                  END DO
               END DO
            END DO
 !
            DO it2=0,nidbas2
               DO iw2=0,nidbas2
                  DO ig2=1,ng2
                     matg(it2,iw2,ig2) = wg2(ig2,i2) * &
                          &  fun2(it2,idert(k,2),ig2,i2) * &
                          &  fun2(iw2,iderw(k,2),ig2,i2)
                  END DO
               END DO
            END DO
 !
            CALL dgemm('N', 'T', ng1, (nidbas2+1)*(nidbas2+1), ng2, 1.0d0, &
                 &      matc, ng1, matg, (nidbas2+1)*(nidbas2+1), 0.0d0, &
                 &      matcg, ng1)
            CALL dgemm('N', 'N', (nidbas1+1)*(nidbas1+1), (nidbas2+1)*(nidbas2+1), &
                 &      ng1, 1.0d0, matf, (nidbas1+1)*(nidbas1+1), matcg, ng1, 1.0d0, &
                 &      mata, (nidbas1+1)*(nidbas1+1))
 !
         END DO
 !
 !   Map local matrix A to global matrix
 !
         DO it1=0,nidbas1
            igt1 = i1+it1; IF(nlper1) igt1 = MODULO(igt1-1,n1) + 1
            DO it2=0,nidbas2
               igt2 = i2+it2; IF(nlper2) igt2 = MODULO(igt2-1, n2) + 1
               irow = glmap(igt1, igt2, n1e, n2e)
               DO iw1=0,nidbas1
                  igw1 = i1+iw1; IF(nlper1) igw1 = MODULO(igw1-1,n1) + 1
                  DO iw2=0,nidbas2
                     igw2 = i2+iw2; IF(nlper2) igw2 = MODULO(igw2-1, n2) + 1
                     jcol = glmap(igw1, igw2, n1e, n2e)
                     CALL updtmat(mat, irow, jcol, mata(it1,iw1,it2,iw2))
                  END DO
               END DO
            END DO
         END DO
 !
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun1)
   DEALLOCATE(fun2)
   DEALLOCATE(idert, iderw, coefs)
   DEALLOCATE(mata)
   DEALLOCATE(matc)
   DEALLOCATE(matg)
   DEALLOCATE(matcg)
   DEALLOCATE(matf)
 !
 CONTAINS
   INTEGER FUNCTION glmap(i,j,n1,n2)
     INTEGER, INTENT(in) :: i,j,n1,n2
     IF(nlnat) THEN
        glmap = (j-1)*n1 + i
     ELSE
        glmap = (i-1)*n2 + j
     END IF
   END FUNCTION glmap
diff --git a/src/conmat2.tpl b/src/conmat2.tpl
index 27f7e4d..e5b0f64 100644
--- a/src/conmat2.tpl
+++ b/src/conmat2.tpl
@@ -1,202 +1,202 @@
 !>
 !> @file conmat2.tpl
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !   In this version local matrices E and D are precalculted
 !   (on all n1/n2 intervals and nterms weak-form terms
 !
   INTERFACE
      SUBROUTINE coefeq(x, y, idt, idw, c)
        DOUBLE PRECISION, INTENT(in) :: x, y
        INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
        DOUBLE PRECISION, INTENT(out) :: c(:)
      END SUBROUTINE coefeq
   END INTERFACE
   INTEGER, OPTIONAL :: maxder(2)   ! maximum oder of derivatives
 !
   INTEGER :: n1, nidbas1, ndim1
   INTEGER :: n2, nidbas2, ndim2
   INTEGER :: ng1, ng2
   INTEGER :: i1, i2, ig1, ig2
   INTEGER :: igt1, igt2, igw1, igw2, irow, jcol
   INTEGER, ALLOCATABLE :: left1(:), left2(:)
 !
   LOGICAL :: nlper1, nlper2
 !
   INTEGER :: kterms         ! Number of terms in weak form
   INTEGER :: k, kmaxder, it1, iw1, it2, iw2
   INTEGER, ALLOCATABLE :: idert(:,:), iderw(:,:)  ! Derivative order
   DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)  ! Terms in weak form
   DOUBLE PRECISION :: dummy(mat%nterms)
 !
   DOUBLE PRECISION, POINTER :: xg1(:,:), xg2(:,:), wg1(:,:), wg2(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:), fun2(:,:,:)
   DOUBLE PRECISION, ALLOCATABLE :: mata(:,:,:,:), matc(:,:), matcd(:,:,:)
   DOUBLE PRECISION, ALLOCATABLE :: matd(:,:,:,:,:), mate(:,:,:,:,:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
   CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
   nlper1 = spl%sp1%period
   nlper2 = spl%sp2%period
 !
 !   Gauss points and weights on all intervals
 !
   xg1 => spl%sp1%gausx  ! xg1(ng1,n1)
   wg1 => spl%sp1%gausw  ! wg1(ng1,n1)
   ng1 = SIZE(xg1,1)
   xg2 => spl%sp2%gausx
   wg2 => spl%sp2%gausw
   ng2 = SIZE(xg2,1)
 !
 !   Derivative orders in the weak form
 !
   kterms = mat%nterms
   ALLOCATE(idert(kterms,2))
   ALLOCATE(iderw(kterms,2))
   CALL coefeq(xg1(1,1), xg2(1,1), idert, iderw, dummy)
 !
 !   Precalc matrix E  in dimension 1
 !
   kmaxder = 1
   IF(PRESENT(maxder)) kmaxder = maxder(1)
   ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1))
   ALLOCATE(left1(ng1))
   ALLOCATE(mate(0:nidbas1,0:nidbas1,ng1,kterms,n1))
   DO i1=1,n1
      left1 = i1
      CALL basfun(xg1(:,i1), spl%sp1, fun1, left1)
      DO k=1,kterms
         DO ig1=1,ng1
            DO iw1=0,nidbas1
               DO it1=0,nidbas1
                  mate(it1,iw1,ig1,k,i1) = wg1(ig1,i1) * &
                       &  fun1(it1,idert(k,1),ig1) * &
                       &  fun1(iw1,iderw(k,1),ig1)
               END DO
            END DO
         END DO
      END DO
   END DO
   DEALLOCATE(fun1)
   DEALLOCATE(left1)
 !
 !   Precalc matrix D  in dimension 2
 !
   kmaxder = 1
   IF(PRESENT(maxder)) kmaxder = maxder(2)
   ALLOCATE(fun2(0:nidbas2,0:kmaxder,ng2))
   ALLOCATE(left2(ng2))
   ALLOCATE(matd(0:nidbas2,0:nidbas2,ng2,kterms,n2))
   DO i2=1,n2
      left2 = i2
      CALL basfun(xg2(:,i2), spl%sp2, fun2, left2)
      DO k=1,kterms
         DO ig2=1,ng2
            DO iw2=0,nidbas2
               DO it2=0,nidbas2
                  matd(it2,iw2,ig2,k,i2) = wg2(ig2,i2) * &
                       &  fun2(it2,idert(k,2),ig2) * &
                       &  fun2(iw2,iderw(k,2),ig2)
               END DO
            END DO
         END DO
      END DO
   END DO
   DEALLOCATE(fun2)
   DEALLOCATE(left2)
 !===========================================================================
 !              2.0 Assembly loop
 !
 !   Physical coefficients in Weak form
 !
   ALLOCATE(coefs(kterms,ng1,ng2))
   ALLOCATE(matc(ng1,ng2))
 !
 !   Allocate local matrix A
 !
   ALLOCATE(mata(0:nidbas1,0:nidbas1,0:nidbas2,0:nidbas2))
   ALLOCATE(matcd(ng1,0:nidbas2,0:nidbas2))
 !
   DO i1=1,n1
      DO i2=1,n2
 !
 !   Coefficients of the weak form
 !
         DO ig1=1,ng1
            DO ig2=1,ng2
               CALL coefeq(xg1(ig1,i1), xg2(ig2,i2), &
                    &      idert, iderw, coefs(:,ig1,ig2))
            END DO
         END DO
 !
 !   Compute local matrix: A <- E*(C*D^T) + A
 !
         mata = 0.0d0
         DO k=1,kterms
 !
            matc(1:ng1,1:ng2) = coefs(k,1:ng1,1:ng2)
 !
            CALL dgemm('N', 'T', ng1, (nidbas2+1)*(nidbas2+1), ng2, 1.0d0, &
                 &      matc, ng1, matd(0,0,1,k,i2), (nidbas2+1)*(nidbas2+1), 0.0d0, &
                 &      matcd, ng1)
            CALL dgemm('N', 'N', (nidbas1+1)*(nidbas1+1), (nidbas2+1)*(nidbas2+1), &
                 &      ng1, 1.0d0, mate(0,0,1,k,i1), (nidbas1+1)*(nidbas1+1), matcd, ng1, 1.0d0, &
                 &      mata, (nidbas1+1)*(nidbas1+1))
 !
         END DO
 !
 !   Map local matrix A to global matrix
 !
         DO it1=0,nidbas1
            igt1 = i1+it1; IF(nlper1) igt1 = MODULO(igt1-1,n1) + 1
            DO it2=0,nidbas2
               igt2 = i2+it2; IF(nlper2) igt2 = MODULO(igt2-1, n2) + 1
               irow = igt2 + (igt1-1)*n2
               DO iw1=0,nidbas1
                  igw1 = i1+iw1; IF(nlper1) igw1 = MODULO(igw1-1,n1) + 1
                  DO iw2=0,nidbas2
                     igw2 = i2+iw2; IF(nlper2) igw2 = MODULO(igw2-1, n2) + 1
                     jcol = igw2 + (igw1-1)*n2
                     CALL updtmat(mat, irow, jcol, mata(it1,iw1,it2,iw2))
                  END DO
               END DO
            END DO
         END DO
 !
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(idert, iderw, coefs)
   DEALLOCATE(mata)
   DEALLOCATE(matc)
   DEALLOCATE(matd)
   DEALLOCATE(matcd)
   DEALLOCATE(mate)
diff --git a/src/conmat_1d.tpl b/src/conmat_1d.tpl
index ce06d1d..282c9a8 100644
--- a/src/conmat_1d.tpl
+++ b/src/conmat_1d.tpl
@@ -1,156 +1,156 @@
 !>
 !> @file conmat_1d.tpl
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !   In this version s[lines are precalculted
 !   (on all n1/n2 intervals
 !
   INTERFACE
      SUBROUTINE coefeq(x, idt, idw, c)
        DOUBLE PRECISION, INTENT(in) :: x
        INTEGER, INTENT(out) :: idt(:), idw(:)
        DOUBLE PRECISION, INTENT(out) :: c(:)
      END SUBROUTINE coefeq
   END INTERFACE
   INTEGER, OPTIONAL :: maxder   ! maximum oder of derivatives
 !
   INTEGER :: n1, nidbas1, ndim1, ng1
   INTEGER :: i1, ig1
   INTEGER :: irow, jcol
   INTEGER, ALLOCATABLE :: left1(:)
 !
   LOGICAL :: nlper1
 !
   INTEGER :: kterms         ! Number of terms in weak form
   INTEGER :: k, kmaxder, it1, iw1
   INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
   DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:)  ! Terms in weak form
 !
   DOUBLE PRECISION, POINTER :: xg1(:,:), wg1(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:,:), fun2(:,:,:,:)
   DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), matc(:)
   DOUBLE PRECISION, ALLOCATABLE :: matf(:,:,:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, ndim1, n1, nidbas1)
   nlper1 = spl%period
 !
 !   Gauss points and weights on all intervals
 !
   xg1 => spl%gausx  ! xg1(ng1,n1)
   wg1 => spl%gausw  ! wg1(ng1,n1)
   ng1 = SIZE(xg1,1)
 !
 !   Splines on all intervals
 !
   kmaxder = 1
   IF(PRESENT(maxder)) kmaxder = maxder
   ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1,n1))
   ALLOCATE(left1(ng1))
   DO i1=1,n1
      left1 = i1
 !!$     DO ig1=1,ng1
 !!$        CALL basfun(xg1(ig1,i1), spl, fun1(:,:,ig1,i1), left1(ig1))
 !!$     END DO
      CALL basfun(xg1(:,i1), spl, fun1(:,:,:,i1), left1)
   END DO
   DEALLOCATE(left1)
 !===========================================================================
 !              2.0 Assembly loop
 !
 !   Weak form
 !
   kterms = mat%nterms
   ALLOCATE(idert(kterms))
   ALLOCATE(iderw(kterms))
   ALLOCATE(coefs(kterms,ng1))
 !
 !   Allocate local matrices
 !
   ALLOCATE(mata(0:nidbas1,0:nidbas1))
   ALLOCATE(matc(ng1))
   ALLOCATE(matf(0:nidbas1,0:nidbas1,ng1))
 !
   DO i1=1,n1
 !
 !   Coefficients of the weak form
 !
      DO ig1=1,ng1
         CALL coefeq(xg1(ig1,i1), idert, iderw, coefs(:,ig1))
      END DO
 !
 !   Compute local matrix: A <- F*c + A
 !
      mata = 0.0d0
      DO k=1,kterms
 !
         matc(1:ng1) = coefs(k,1:ng1)
 !
         DO it1=0,nidbas1
            DO iw1=0,nidbas1
               DO ig1=1,ng1
                  matf(it1,iw1,ig1) = wg1(ig1,i1) * &
                       &  fun1(it1,idert(k),ig1,i1) * &
                       &  fun1(iw1,iderw(k),ig1,i1)
               END DO
            END DO
         END DO
 !
         CALL dgemv('N', (nidbas1+1)*(nidbas1+1), ng1, 1.0d0, matf, &
              &          (nidbas1+1)*(nidbas1+1), matc, 1, 1.0d0, mata, 1)
      END DO
 !
 !   Map local matrix A to global matrix
 !
 !!$     WRITE(*,'(/a,i3)') "Lambda, i =", i1
 !!$     DO ig1=1,ng1
 !!$        WRITE(*,'(10(1pe12.3))') fun1(:,0,ig1,i1)
 !!$     END DO
 !!$     WRITE(*,'(a,i3)') "Lambda', i =", i1
 !!$     DO ig1=1,ng1
 !!$        WRITE(*,'(10(1pe12.3))') fun1(:,1,ig1,i1)
 !!$     END DO
 !!$     WRITE(*,'(/a)') 'local matrix'
      DO it1=0,nidbas1
         irow = i1+it1; IF(nlper1) irow = MODULO(irow-1,n1) + 1
         DO iw1=0,nidbas1
            jcol = i1+iw1; IF(nlper1) jcol = MODULO(jcol-1,n1) + 1
            CALL updtmat(mat, irow, jcol, mata(it1,iw1))
         END DO
 !!$        WRITE(*,'(10(1pe12.3))') mata(it1,:)
      END DO
 !
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun1)
   DEALLOCATE(idert, iderw, coefs)
   DEALLOCATE(mata)
   DEALLOCATE(matc)
   DEALLOCATE(matf)
diff --git a/src/conrhs.tpl b/src/conrhs.tpl
index 5278381..66b05e5 100644
--- a/src/conrhs.tpl
+++ b/src/conrhs.tpl
@@ -1,52 +1,52 @@
 !>
 !> @file conrhs.tpl
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
     DOUBLE PRECISION, POINTER :: xg(:,:), wg(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
     INTEGER :: ndim, n, nidbas, ng
     INTEGER :: i, ig, it, irow
     LOGICAL :: nlper
 !
     CALL get_dim(spl, ndim, n, nidbas)
     nlper = spl%period
     xg => spl%gausx  ! xg(ng,n)
     wg => spl%gausw  ! wg(ng,n)
     ng = SIZE(xg,1)
     ALLOCATE(fun(0:nidbas,1))
 !
     farr = 0.0d0
     DO i=1,n
        DO ig=1,ng
           CALL basfun(xg(ig,i), spl, fun, i)
           contrib = wg(ig,i)*frhs(xg(ig,i))
           DO it=0,nidbas
              irow = i+it
              IF(nlper) irow = MODULO(irow-1,n) +1
              farr(irow) = farr(irow)+contrib*fun(it,1)
           END DO
        END DO
     END DO
 !
     DEALLOCATE(fun)
diff --git a/src/csr_mod.f90 b/src/csr_mod.f90
index ca67f83..23d0a36 100644
--- a/src/csr_mod.f90
+++ b/src/csr_mod.f90
@@ -1,1255 +1,1255 @@
 !>
 !> @file csr_mod.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE csr
 !
 !   CSR: Implement CSR (Compressed Sparse Row) matrice
 !
 !    T.M. Tran, CRPP-EPFL
 !    October 2012
 !
   USE sparse
   USE mumps_bsplines
   IMPLICIT NONE
 !
   TYPE, EXTENDS(spmat) :: csr_mat
      INTEGER                   :: mrows, ncols
      INTEGER                   :: nnz = 0            ! Number of non-zeros
      INTEGER                   :: nterms             ! Number of terms in weak form
      LOGICAL                   :: nlforce_zero       ! Keep exixting nodes with zero value if .true.
      INTEGER, POINTER          :: irow(:) => NULL()  ! points to start of row
      INTEGER, POINTER          :: idiag(:) => NULL() ! points to diagonal element
      INTEGER, POINTER          :: cols(:) => NULL()  ! Column indices
      DOUBLE PRECISION, POINTER :: val(:) => NULL()   ! Elelement values
      TYPE(mumps_mat), ALLOCATABLE :: mumps
   END TYPE csr_mat
 !
   TYPE, EXTENDS(zspmat) :: zcsr_mat
      INTEGER                   :: mrows, ncols
      INTEGER                   :: nnz = 0            ! Number of non-zeros
      INTEGER                   :: nterms             ! Number of terms in weak form
      LOGICAL                   :: nlforce_zero       ! Keep exixting nodes with zero value if .true.
      INTEGER, POINTER          :: irow(:) => NULL()  ! points to start of row
      INTEGER, POINTER          :: idiag(:) => NULL() ! points to diagonal element
      INTEGER, POINTER          :: cols(:) => NULL()  ! Column indices
      DOUBLE COMPLEX, POINTER :: val(:) => NULL()   ! Elelement values
 !
      TYPE(zmumps_mat), ALLOCATABLE :: mumps
   END TYPE zcsr_mat
 !
   INTERFACE init
      MODULE PROCEDURE init_csr_mat, init_zcsr_mat
   END INTERFACE init
   INTERFACE clear_mat
      MODULE PROCEDURE clear_csr_mat, clear_zcsr_mat
   END INTERFACE clear_mat
   INTERFACE updtmat
      MODULE PROCEDURE updt_csr_mat, updt_zcsr_mat
   END INTERFACE updtmat
   INTERFACE putele
      MODULE PROCEDURE putele_csr_mat, putele_zcsr_mat
   END INTERFACE putele
   INTERFACE getele
      MODULE PROCEDURE getele_csr_mat, getele_zcsr_mat
   END INTERFACE getele
   INTERFACE putrow
      MODULE PROCEDURE putrow_csr_mat, putrow_zcsr_mat
   END INTERFACE putrow
   INTERFACE getrow
      MODULE PROCEDURE getrow_csr_mat, getrow_zcsr_mat
   END INTERFACE getrow
   INTERFACE getdiag
      MODULE PROCEDURE getdiag_csr_mat, getdiag_zcsr_mat
   END INTERFACE getdiag
   INTERFACE putcol
      MODULE PROCEDURE putcol_csr_mat, putcol_zcsr_mat
   END INTERFACE putcol
   INTERFACE getcol
      MODULE PROCEDURE getcol_csr_mat, getcol_zcsr_mat
   END INTERFACE getcol
   INTERFACE to_mat
      MODULE PROCEDURE to_csr_mat, to_zcsr_mat
   END INTERFACE to_mat
   INTERFACE vmx
      MODULE PROCEDURE vmx_csr_mat,  vmx_csr_matn, vmx_zcsr_mat,  vmx_zcsr_matn
   END INTERFACE vmx
   INTERFACE destroy
      MODULE PROCEDURE destroy_csr_mat, destroy_zcsr_mat
   END INTERFACE destroy
   INTERFACE putmat
      MODULE PROCEDURE put_csr_mat, put_zcsr_mat
   END INTERFACE putmat
 !>>>>>
 !>>>>> CONMAT
 !>>>>
   INTERFACE conmat
      MODULE PROCEDURE conmat_1d_csr, conmat_2d_csr, conmat_1d_zcsr, conmat_2d_zcsr
   END INTERFACE conmat
 !>>>>
 !>>>> MULTIGRID_MOD
 !>>>>
   INTERFACE femat
      MODULE PROCEDURE femat_csr
   END INTERFACE femat
   INTERFACE matnorm
      MODULE PROCEDURE matnorm_csr
   END INTERFACE matnorm
   INTERFACE kron
      MODULE PROCEDURE kron_csr
   END INTERFACE kron
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE init_csr_mat(n, nterms, mat, nlforce_zero, ncols)
 !
 !   Initialize an empty CSR matrix
 !
     INTEGER, INTENT(in)           :: n, nterms
     TYPE(csr_mat)                 :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
     INTEGER, OPTIONAL, intent(in) :: ncols
 !
     CALL init(n, mat%spmat)
     mat%mrows = n
     mat%ncols = n
     IF(PRESENT(ncols)) mat%ncols = ncols
     mat%nterms = nterms
     mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
     IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
 !
   END SUBROUTINE init_csr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE init_zcsr_mat(n, nterms, mat, nlforce_zero, ncols)
 !
 !   Initialize an empty CSR matrix
 !
     INTEGER, INTENT(in)           :: n, nterms
     TYPE(zcsr_mat)                :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
     INTEGER, OPTIONAL, intent(in) :: ncols
 !
     CALL init(n, mat%zspmat)
     mat%mrows = n
     mat%ncols = n
     IF(PRESENT(ncols)) mat%ncols = ncols
     mat%nterms = nterms
     mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
     IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
 !
   END SUBROUTINE init_zcsr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE clear_zcsr_mat(mat)
 !
 !   Clear matrix, keeping its sparse structure unchanged
 !
     TYPE(zcsr_mat) :: mat
 !
     mat%val = (0.0d0,0.0d0)
   END SUBROUTINE clear_zcsr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE clear_csr_mat(mat)
 !
 !   Clear matrix, keeping its sparse structure unchanged
 !
     TYPE(csr_mat) :: mat
 !
     mat%val = 0.0d0
   END SUBROUTINE clear_csr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE updt_csr_mat(mat, i, j, val)
 !
 !   Update element Aij of csr  matrix
 !
     TYPE(csr_mat)                :: mat
     INTEGER, INTENT(in)          :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
     INTEGER :: k, s, e
 !
     IF(mat%nnz.EQ.0) THEN   ! Still using linked lists
        CALL updtmat(mat%spmat, i, j, val)
     ELSE
        s = mat%irow(i)
        e = mat%irow(i+1)-1
        k = isearch(mat%cols(s:e), j)
        IF( k.GE.0 ) THEN
           mat%val(s+k) =  mat%val(s+k)+val
        ELSE
           WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
           STOP '*** Abnormal EXIT in MODULE csr_mod ***'
        END IF
     END IF
   END SUBROUTINE updt_csr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE updt_zcsr_mat(mat, i, j, val)
 !
 !   Update element Aij of csr  matrix
 !
     TYPE(zcsr_mat)               :: mat
     INTEGER, INTENT(in)          :: i, j
     DOUBLE COMPLEX, INTENT(in)   :: val
     INTEGER :: k, s, e
 !
     IF(mat%nnz.EQ.0) THEN   ! Still using linked lists
        CALL updtmat(mat%zspmat, i, j, val)
     ELSE
        s = mat%irow(i)
        e = mat%irow(i+1)-1
        k = isearch(mat%cols(s:e), j)
        IF( k.GE.0 ) THEN
           mat%val(s+k) =  mat%val(s+k)+val
        ELSE
           WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
           STOP '*** Abnormal EXIT in MODULE csr_mod ***'
        END IF
     END IF
   END SUBROUTINE updt_zcsr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putele_csr_mat(mat, i, j, val)
 !
 !  Put element (i,j) of matrix
 !
     TYPE(csr_mat)                 :: mat
     INTEGER, INTENT(in)          :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
     INTEGER :: iput, jput
     INTEGER :: k, s, e
 !
     iput = i
     jput = j
 !
     IF(mat%nnz.EQ.0) THEN   ! Still using linked lists
        CALL putele(mat%spmat, iput, jput, val, &
             &      nlforce_zero=mat%nlforce_zero)
     ELSE
        s = mat%irow(iput)
        e = mat%irow(iput+1)-1
        k = isearch(mat%cols(s:e), jput)
        IF( k.GE.0 ) THEN
           mat%val(s+k) =  val
        ELSE
           IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
              WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
              PRINT*, 'val', val
              PRINT*, 'matrix m, n', mat%mrows, mat%ncols
              STOP '*** Abnormal EXIT in MODULE csr_mod ***'
           END IF
        END IF
    END IF
   END SUBROUTINE putele_csr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putele_zcsr_mat(mat, i, j, val)
 !
 !  Put element (i,j) of matrix
 !
     TYPE(zcsr_mat)                :: mat
     INTEGER, INTENT(in)          :: i, j
     DOUBLE COMPLEX, INTENT(in)  :: val
     INTEGER :: iput, jput
     INTEGER :: k, s, e
 !
     iput = i
     jput = j
 !
     IF(mat%nnz.EQ.0) THEN   ! Still using linked lists
        CALL putele(mat%zspmat, iput, jput, val, &
             &      nlforce_zero=mat%nlforce_zero)
     ELSE
        s = mat%irow(iput)
        e = mat%irow(iput+1)-1
        k = isearch(mat%cols(s:e), jput)
        IF( k.GE.0 ) THEN
           mat%val(s+k) =  val
        ELSE
           IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
              WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
              PRINT*, 'val', val
              PRINT*, 'matrix m, n', mat%mrows, mat%ncols
              STOP '*** Abnormal EXIT in MODULE csr_mod ***'
           END IF
        END IF
    END IF
  END SUBROUTINE putele_zcsr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getele_csr_mat(mat, i, j, val)
 !
 !   Get element (i,j) of sparse matrix
 !
     TYPE(csr_mat)                 :: mat
     INTEGER, INTENT(in)           :: i, j
     DOUBLE PRECISION, INTENT(out) :: val
     INTEGER :: iget, jget
     INTEGER :: k, s, e
 !
     iget = i
     jget = j
     IF(mat%nnz.EQ.0) THEN   ! Still using linked lists
        CALL getele(mat%spmat, iget, jget, val)
     ELSE
        s = mat%irow(iget)
        e = mat%irow(iget+1)-1
        k = isearch(mat%cols(s:e), jget)
        IF( k.GE.0 ) THEN
           val =mat%val(s+k) 
        ELSE
           val = 0.0d0   ! Assume zero val if not found
        END IF
     END IF
   END SUBROUTINE getele_csr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getele_zcsr_mat(mat, i, j, val)
 !
 !   Get element (i,j) of sparse matrix
 !
     TYPE(zcsr_mat)                :: mat
     INTEGER, INTENT(in)           :: i, j
     DOUBLE COMPLEX, INTENT(out)   :: val
     INTEGER :: iget, jget
     INTEGER :: k, s, e
 !
     iget = i
     jget = j
     IF(mat%nnz.EQ.0) THEN   ! Still using linked lists
        CALL getele(mat%zspmat, iget, jget, val)
     ELSE
        s = mat%irow(iget)
        e = mat%irow(iget+1)-1
        k = isearch(mat%cols(s:e), jget)
        IF( k.GE.0 ) THEN
           val =mat%val(s+k) 
        ELSE
           val = 0.0d0   ! Assume zero val if not found
        END IF
     END IF
   END SUBROUTINE getele_zcsr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putrow_csr_mat(amat, i, arr)
 !
 ! Put a row into sparse matrix
 !
     TYPE(csr_mat), INTENT(inout) :: amat
     INTEGER, INTENT(in)          :: i
     DOUBLE PRECISION, INTENT(in) :: arr(:)
     INTEGER :: s, e, j
 !
     IF(amat%nnz.EQ.0) THEN   ! Still using linked lists
        DO j=1,amat%ncols
           CALL putele(amat, i, j, arr(j))
        END DO
     ELSE
        s = amat%irow(i)
        e = amat%irow(i+1)-1
        DO j=s,e
           amat%val(j) = arr(amat%cols(j))
        END DO
     END IF
   END SUBROUTINE putrow_csr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putrow_zcsr_mat(amat, i, arr)
 !
 ! Put a row into sparse matrix
 !
     TYPE(zcsr_mat), INTENT(inout) :: amat
     INTEGER, INTENT(in)           :: i
     DOUBLE COMPLEX, INTENT(in)    :: arr(:)
     INTEGER :: s, e, j
 !
     IF(amat%nnz.EQ.0) THEN   ! Still using linked lists
        DO j=1,amat%ncols
           CALL putele(amat, i, j, arr(j))
        END DO
     ELSE
        s = amat%irow(i)
        e = amat%irow(i+1)-1
        DO j=s,e
           amat%val(j) = arr(amat%cols(j))
        END DO
     END IF
   END SUBROUTINE putrow_zcsr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getrow_csr_mat(amat, i, arr)
 !
 !   Get a row from sparse matrix
 !
     TYPE(csr_mat), INTENT(in)     :: amat
     INTEGER, INTENT(in)           :: i
     DOUBLE PRECISION, INTENT(out) :: arr(:)
     INTEGER :: s, e, j
 !
     arr = 0.0d0
     IF(amat%nnz.EQ.0) THEN   ! Still using linked lists
        DO j=1,amat%ncols
           CALL getele(amat, i, j, arr(j))
        END DO
     ELSE
        s = amat%irow(i)
        e = amat%irow(i+1)-1
        DO j=s,e
           arr(amat%cols(j)) = amat%val(j)
        END DO
     END IF
   END SUBROUTINE getrow_csr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getrow_zcsr_mat(amat, i, arr)
 !
 !   Get a row from sparse matrix
 !
     TYPE(zcsr_mat), INTENT(in)    :: amat
     INTEGER, INTENT(in)           :: i
     DOUBLE COMPLEX, INTENT(out)   :: arr(:)
     INTEGER :: s, e, j
 !
     arr = 0.0d0
     IF(amat%nnz.EQ.0) THEN   ! Still using linked lists
        DO j=1,amat%ncols
           CALL getele(amat, i, j, arr(j))
        END DO
     ELSE
        s = amat%irow(i)
        e = amat%irow(i+1)-1
        DO j=s,e
           arr(amat%cols(j)) = amat%val(j)
        END DO
     END IF
   END SUBROUTINE getrow_zcsr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getdiag_csr_mat(amat, arr)
 !
 !   Get the diagonal from matrix
 !
     TYPE(csr_mat), INTENT(in)     :: amat
     DOUBLE PRECISION, INTENT(out) :: arr(:)
 !
 !  WARNING: assume that CSR matrix has been converted from linked lists
 !
     arr(:) = amat%val(amat%idiag(:))
   END SUBROUTINE getdiag_csr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getdiag_zcsr_mat(amat, arr)
 !
 !   Get the diagonal from matrix
 !
     TYPE(zcsr_mat), INTENT(in)    :: amat
     DOUBLE COMPLEX, INTENT(out)   :: arr(:)
 !
 !  WARNING: assume that CSR matrix has been converted from linked lists
 !
     arr(:) = amat%val(amat%idiag(:))
   END SUBROUTINE getdiag_zcsr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putcol_csr_mat(amat, j, arr)
 !
 ! Put a column into sparse matrix
 !
     TYPE(csr_mat), INTENT(inout)     :: amat
     INTEGER, INTENT(in)              :: j
     DOUBLE PRECISION, INTENT(in)     :: arr(:)
     INTEGER :: i
 !
     DO i=1,amat%mrows
        CALL putele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE putcol_csr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putcol_zcsr_mat(amat, j, arr)
 !
 ! Put a column into sparse matrix
 !
     TYPE(zcsr_mat), INTENT(inout)    :: amat
     INTEGER, INTENT(in)              :: j
     DOUBLE COMPLEX, INTENT(in)       :: arr(:)
     INTEGER :: i
 !
     DO i=1,amat%mrows
        CALL putele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE putcol_zcsr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getcol_csr_mat(amat, j, arr)
 !
 !   Get a column from sparse matrix
 !
     TYPE(csr_mat), INTENT(in)     :: amat
     INTEGER, INTENT(in)           :: j
     DOUBLE PRECISION, INTENT(out) :: arr(:)
     INTEGER :: i
 !
     DO i=1,amat%mrows
        CALL getele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE getcol_csr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getcol_zcsr_mat(amat, j, arr)
 !
 !   Get a column from sparse matrix
 !
     TYPE(zcsr_mat), INTENT(in)    :: amat
     INTEGER, INTENT(in)           :: j
     DOUBLE COMPLEX, INTENT(out)   :: arr(:)
     INTEGER :: i
 !
     DO i=1,amat%mrows
        CALL getele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE getcol_zcsr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE to_csr_mat(mat, nlkeep)
 !
 !   Convert linked list spmat to csr matrice structure
 !
     TYPE(csr_mat)             :: mat
     LOGICAL, INTENT(in), OPTIONAL :: nlkeep
     INTEGER :: nnz_arr(mat%rank)
     INTEGER :: i, nnz, rank, s, e
     LOGICAL :: nlclean
 !
     nlclean = .TRUE.
     IF(PRESENT(nlkeep)) THEN
        nlclean = .NOT. nlkeep
     END IF
 !
 !    Allocate the csr matrix structure
 !
     nnz = get_count(mat%spmat, nnz_arr)
     rank = mat%rank
     mat%nnz = nnz
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%idiag)) DEALLOCATE(mat%idiag)
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     ALLOCATE(mat%irow(rank+1))
     ALLOCATE(mat%idiag(rank))
     ALLOCATE(mat%cols(nnz))
     ALLOCATE(mat%val(nnz))
 !
 !    Fill csr structure and optionally deallocate the sparse rows
 !
     mat%irow = 1
     DO i=1,rank
        mat%irow(i+1) = mat%irow(i) + nnz_arr(i)
        s = mat%irow(i)
        e = mat%irow(i+1)-1
        CALL getrow(mat%spmat%row(i), mat%val(s:e), mat%cols(s:e))
        mat%idiag(i) = isearch(mat%cols(s:e), i) + s
        IF(nlclean) CALL destroy(mat%spmat%row(i))
     END DO
 !!$!
 !!$!    MUMPS mat for direct solver
 !!$!
 !!$    ALLOCATE(mat%mumps)
 !!$    CALL csr2mumps(mat, mat%mumps)
   END SUBROUTINE to_csr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE to_zcsr_mat(mat, nlkeep)
 !
 !   Convert linked list spmat to csr matrice structure
 !
     TYPE(zcsr_mat)                :: mat
     LOGICAL, INTENT(in), OPTIONAL :: nlkeep
     INTEGER :: nnz_arr(mat%rank)
     INTEGER :: i, nnz, rank, s, e
     LOGICAL :: nlclean
 !
     nlclean = .TRUE.
     IF(PRESENT(nlkeep)) THEN
        nlclean = .NOT. nlkeep
     END IF
 !
 !    Allocate the csr matrix structure
 !
     nnz = get_count(mat%zspmat, nnz_arr)
     rank = mat%rank
     mat%nnz = nnz
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%idiag)) DEALLOCATE(mat%idiag)
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     ALLOCATE(mat%irow(rank+1))
     ALLOCATE(mat%idiag(rank))
     ALLOCATE(mat%cols(nnz))
     ALLOCATE(mat%val(nnz))
 !
 !    Fill csr structure and optionally deallocate the sparse rows
 !
     mat%irow = 1
     DO i=1,rank
        mat%irow(i+1) = mat%irow(i) + nnz_arr(i)
        s = mat%irow(i)
        e = mat%irow(i+1)-1
        CALL getrow(mat%zspmat%row(i), mat%val(s:e), mat%cols(s:e))
        mat%idiag(i) = isearch(mat%cols(s:e), i) + s
        IF(nlclean) CALL destroy(mat%zspmat%row(i))
     END DO
 !!$!
 !!$!    MUMPS mat for direct solver
 !!$!
 !!$    ALLOCATE(mat%mumps)
 !!$    CALL csr2mumps(mat, mat%mumps)
   END SUBROUTINE to_zcsr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE csr2mumps(mat, mat_mumps)
 !
 !   Fill mumps structure (based on routine to_mumps_mat)
 !
     INCLUDE 'mpif.h'
     TYPE(csr_mat)   :: mat
     TYPE(mumps_mat) :: mat_mumps
 !
     INTEGER :: i, rank, s, e
     INTEGER :: comm, ierr, nnz_loc
 !
     CALL init(mat%rank, mat%nterms, mat_mumps)
 !
     comm = mat_mumps%mumps_par%COMM
     mat_mumps%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix
 !
 !    Allocate the Mumps matrix structure
 !      CSR format: (cols, irow, val) or (JCN, irow, A)
 !      COO format: (IRN, JCN, A)  or (IRN, cols, val)
 !
     rank = mat_mumps%rank
     nnz_loc = mat%nnz
     mat_mumps%nnz_start = 0
     CALL mpi_exscan(nnz_loc, mat_mumps%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
     mat_mumps%nnz_start = mat_mumps%nnz_start + 1
     mat_mumps%nnz_end = mat_mumps%nnz_start + nnz_loc - 1
     mat_mumps%nnz_loc = nnz_loc
     CALL mpi_allreduce(nnz_loc, mat_mumps%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
 !
     mat_mumps%mumps_par%N = rank
     mat_mumps%mumps_par%NZ_loc = nnz_loc
 !
     mat_mumps%cols => mat%cols
     mat_mumps%irow => mat%irow
     mat_mumps%val => mat%val
 !
 !     (A,JCN) picked from CSR mat
     mat_mumps%mumps_par%A_loc => mat_mumps%val
     mat_mumps%mumps_par%JCN_loc => mat_mumps%cols
 !
 !    Determine IRN array
     IF(ASSOCIATED(mat_mumps%mumps_par%IRN_loc)) DEALLOCATE(mat_mumps%mumps_par%IRN_loc)
     ALLOCATE(mat_mumps%mumps_par%IRN_loc(nnz_loc))
     DO i=mat_mumps%istart,mat_mumps%iend
        s = mat_mumps%irow(i) - mat_mumps%nnz_start + 1
        e = mat_mumps%irow(i+1) - mat_mumps%nnz_start
        mat_mumps%mumps_par%IRN_loc(s:e) = i
     END DO
     CALL destroy(mat_mumps%mat)
     NULLIFY(mat_mumps%mat)
 !    
   END SUBROUTINE csr2mumps
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE zcsr2mumps(mat, mat_mumps)
 !
 !   Fill mumps structure (based on routine to_mumps_mat)
 !
     INCLUDE 'mpif.h'
     TYPE(zcsr_mat)   :: mat
     TYPE(zmumps_mat) :: mat_mumps
 !
     INTEGER :: i, rank, s, e
     INTEGER :: comm, ierr, nnz_loc
 !
     CALL init(mat%rank, mat%nterms, mat_mumps)
 !
     comm = mat_mumps%mumps_par%COMM
     mat_mumps%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix
 !
 !    Allocate the Mumps matrix structure
 !      CSR format: (cols, irow, val) or (JCN, irow, A)
 !      COO format: (IRN, JCN, A)  or (IRN, cols, val)
 !
     rank = mat_mumps%rank
     nnz_loc = mat%nnz
     mat_mumps%nnz_start = 0
     CALL mpi_exscan(nnz_loc, mat_mumps%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
     mat_mumps%nnz_start = mat_mumps%nnz_start + 1
     mat_mumps%nnz_end = mat_mumps%nnz_start + nnz_loc - 1
     mat_mumps%nnz_loc = nnz_loc
     CALL mpi_allreduce(nnz_loc, mat_mumps%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
 !
     mat_mumps%mumps_par%N = rank
     mat_mumps%mumps_par%NZ_loc = nnz_loc
 !
     mat_mumps%cols => mat%cols
     mat_mumps%irow => mat%irow
     mat_mumps%val => mat%val
 !
 !     (A,JCN) picked from CSR mat
     mat_mumps%mumps_par%A_loc => mat_mumps%val
     mat_mumps%mumps_par%JCN_loc => mat_mumps%cols
 !
 !    Determine IRN array
     IF(ASSOCIATED(mat_mumps%mumps_par%IRN_loc)) DEALLOCATE(mat_mumps%mumps_par%IRN_loc)
     ALLOCATE(mat_mumps%mumps_par%IRN_loc(nnz_loc))
     DO i=mat_mumps%istart,mat_mumps%iend
        s = mat_mumps%irow(i) - mat_mumps%nnz_start + 1
        e = mat_mumps%irow(i+1) - mat_mumps%nnz_start
        mat_mumps%mumps_par%IRN_loc(s:e) = i
     END DO
     CALL destroy(mat_mumps%mat)
     NULLIFY(mat_mumps%mat)
 !    
   END SUBROUTINE zcsr2mumps
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE destroy_csr_mat(mat)
 !
 !   Deallocate csr mat
 !
     TYPE(csr_mat) :: mat
 !
     CALL destroy(mat%spmat)
     IF(mat%nnz.GT.0) THEN
        DEALLOCATE(mat%irow)
        DEALLOCATE(mat%idiag)
        DEALLOCATE(mat%cols)
        DEALLOCATE(mat%val)
     END IF
     mat%nnz = 0
   END SUBROUTINE destroy_csr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE destroy_zcsr_mat(mat)
 !
 !   Deallocate csr mat
 !
     TYPE(zcsr_mat) :: mat
 !
     CALL destroy(mat%zspmat)
     IF(mat%nnz.GT.0) THEN
        DEALLOCATE(mat%irow)
        DEALLOCATE(mat%idiag)
        DEALLOCATE(mat%cols)
        DEALLOCATE(mat%val)
     END IF
     mat%nnz = 0
   END SUBROUTINE destroy_zcsr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_csr_mat(mat, xarr, transa_in) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(csr_mat)                          :: mat
     DOUBLE PRECISION, INTENT(in)           :: xarr(:)
     CHARACTER(len=1), OPTIONAL, INTENT(in) :: transa_in
     DOUBLE PRECISION                       :: yarr(SIZE(xarr))
 !
     DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
     CHARACTER(len=1) :: transa
     CHARACTER(len=6) :: matdescra
     INTEGER :: n, i, j
 !
     n = mat%rank
     transa = 'N'
     IF(PRESENT(transa_in)) transa = transa_in
 !
 #ifdef MKL
     matdescra = 'g'
     CALL mkl_dcsrmv(transa, n, n, alpha, matdescra, mat%val, &
          &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
          &          beta, yarr)
 #else
     yarr = 0.0d0
     DO i=1,mat%rank
        DO j=mat%irow(i), mat%irow(i+1)-1
           IF(transa .EQ. 'N') THEN
              yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
           ELSE
              yarr(mat%cols(j)) = yarr(mat%cols(j)) + mat%val(j)*xarr(i)
           END IF
        END DO
     END DO
 #endif
 !
   END FUNCTION vmx_csr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_zcsr_mat(mat, xarr, transa_in) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(zcsr_mat)                         :: mat
     DOUBLE COMPLEX, INTENT(in)             :: xarr(:)
     CHARACTER(len=1), OPTIONAL, INTENT(in) :: transa_in
     DOUBLE COMPLEX                         :: yarr(SIZE(xarr))
 !
     DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
     CHARACTER(len=1) :: transa
     CHARACTER(len=6) :: matdescra
     INTEGER :: n, i, j
 !
     n = mat%rank
     transa = 'N'
     IF(PRESENT(transa_in)) transa = transa_in
 !
 #ifdef MKL
     matdescra = 'g'
     CALL mkl_zcsrmv(transa, n, n, alpha, matdescra, mat%val, &
          &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
          &          beta, yarr)
 #else
     yarr = 0.0d0
     DO i=1,mat%rank
        DO j=mat%irow(i), mat%irow(i+1)-1
           IF(transa .EQ. 'N') THEN
              yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
           ELSE
              yarr(mat%cols(j)) = yarr(mat%cols(j)) + mat%val(j)*xarr(i)
           END IF
        END DO
     END DO
 #endif
 !
   END FUNCTION vmx_zcsr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_csr_matn(mat, xarr, transa_in) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(csr_mat)                          :: mat
     DOUBLE PRECISION, INTENT(in)           :: xarr(:,:)
     CHARACTER(len=1), OPTIONAL, INTENT(in) :: transa_in
     DOUBLE PRECISION                       :: yarr(SIZE(xarr,1),SIZE(xarr,2))
 !
     DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
     CHARACTER(len=1) :: transa
     INTEGER :: n, nrhs, i, j
     CHARACTER(len=6) :: matdescra
 !
     n = mat%rank
     nrhs = SIZE(xarr,2)
     transa = 'N'
     IF(PRESENT(transa_in)) transa = transa_in
 !
 #ifdef MKL
     matdescra = 'g'
     CALL mkl_dcsrmm(transa, n, nrhs, n, alpha, matdescra, mat%val,&
          &           mat%cols, mat%irow(1), mat%irow(2), xarr, &
          &           n, beta, yarr, n)
 #else
     yarr = 0.0d0
     DO i=1,n
        DO j=mat%irow(i), mat%irow(i+1)-1
           IF(transa .EQ. 'N') THEN
              yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
           ELSE
              yarr(mat%cols(j),:) = yarr(mat%cols(j),:) + mat%val(j)*xarr(i,:)
           END IF
        END DO
     END DO
 #endif
 !
   END FUNCTION vmx_csr_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_zcsr_matn(mat, xarr, transa_in) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(zcsr_mat)                         :: mat
     DOUBLE COMPLEX, INTENT(in)             :: xarr(:,:)
     CHARACTER(len=1), OPTIONAL, INTENT(in) :: transa_in
     DOUBLE COMPLEX                          :: yarr(SIZE(xarr,1),SIZE(xarr,2))
 !
     DOUBLE COMPLEX   :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
     CHARACTER(len=1) :: transa
     INTEGER :: n, nrhs, i, j
     CHARACTER(len=6) :: matdescra
 !
     n = mat%rank
     nrhs = SIZE(xarr,2)
     transa = 'N'
     IF(PRESENT(transa_in)) transa = transa_in
 !
 #ifdef MKL
     matdescra = 'g'
     CALL mkl_zcsrmm(transa, n, nrhs, n, alpha, matdescra, mat%val,&
          &           mat%cols, mat%irow(1), mat%irow(2), xarr, &
          &           n, beta, yarr, n)
 #else
     yarr = (0.0d0,0.0d0)
     DO i=1,n
        DO j=mat%irow(i), mat%irow(i+1)-1
           IF(transa .EQ. 'N') THEN
              yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
           ELSE
              yarr(mat%cols(j),:) = yarr(mat%cols(j),:) + mat%val(j)*xarr(i,:)
           END IF
        END DO
     END DO
 #endif
 !
   END FUNCTION vmx_zcsr_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE conmat_1d_csr(spl, mat, coefeq, maxder)
 !
 !   Construction of FE matrix mat for 1D differential operator
 !   using spline spl
 !
     USE bsplines
     TYPE(csr_mat)              :: mat
     TYPE(spline1d), INTENT(in) :: spl
 !
     INCLUDE '../../bsplines/src/conmat_1d.tpl'
   END SUBROUTINE conmat_1d_csr
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE conmat_1d_zcsr(spl, mat, coefeq, maxder)
 !
 !   Construction of FE matrix mat for 1D differential operator
 !   using spline spl
 !
     USE bsplines
     TYPE(zcsr_mat)              :: mat
     TYPE(spline1d), INTENT(in) :: spl
 !
     INCLUDE '../../bsplines/src/zconmat_1d.tpl'
   END SUBROUTINE conmat_1d_zcsr
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE conmat_2d_csr(spl, mat, coefeq, maxder, nat_order)
 !
 !   Construction of FE matrix mat for 2D differential operator
 !   using spline spl
 !
     USE bsplines
     TYPE(spline2d), INTENT(in) :: spl
     TYPE(csr_mat)              :: mat
 !
     INCLUDE '../../bsplines/src/conmat.tpl'
   END SUBROUTINE conmat_2d_csr
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE conmat_2d_zcsr(spl, mat, coefeq, maxder, nat_order)
 !
 !   Construction of FE matrix mat for 2D differential operator
 !   using spline spl
 !
     USE bsplines
     TYPE(spline2d), INTENT(in) :: spl
     TYPE(zcsr_mat)             :: mat
 !
     INCLUDE '../../bsplines/src/zconmat.tpl'
   END SUBROUTINE conmat_2d_zcsr
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE femat_csr(spl, mat, coefeq, nterms)
 !
 !   Compute fe matrix
 !
     USE bsplines
     TYPE(spline1d), INTENT(in)   :: spl
     TYPE(csr_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in)          :: nterms
     INTERFACE
        SUBROUTINE coefeq(x, idt, idw, c)
          DOUBLE PRECISION, INTENT(in) :: x
          INTEGER, INTENT(out) :: idt(:), idw(:)
          DOUBLE PRECISION, INTENT(out) :: c(:)
        END SUBROUTINE coefeq
     END INTERFACE
 !
     INTEGER :: nrank, nx, nidbas
 !
     CALL get_dim(spl, nrank, nx, nidbas)
     IF(spl%period) nrank = nx
     IF(mat%nnz.EQ.0) THEN
        WRITE(*,'(a,i0,a)') 'FEMAT: Initialize mat with ', &
             &    nterms, ' terms ...'
        CALL init(nrank, nterms, mat)
     END IF
     CALL conmat(spl, mat, coefeq)
   END SUBROUTINE femat_csr
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE put_csr_mat(fid, label, mat, str)
 !
 !   Write matrix to hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)                    :: fid
     CHARACTER(len=*), INTENT(in)           :: label
     TYPE(csr_mat)                          :: mat
     CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
 !
     IF(PRESENT(str)) THEN
        CALL creatg(fid, label, str)
     ELSE
        CALL creatg(fid, label)
     END IF
     CALL attach(fid, label, 'RANK', mat%rank)
     CALL attach(fid, label, 'NNZ',  mat%nnz)
     CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
     CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
     CALL putarr(fid, TRIM(label)//'/idiag', mat%idiag)
     CALL putarr(fid, TRIM(label)//'/val', mat%val)
   END SUBROUTINE put_csr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE put_zcsr_mat(fid, label, mat, str)
 !
 !   Write matrix to hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)                    :: fid
     CHARACTER(len=*), INTENT(in)           :: label
     TYPE(zcsr_mat)                         :: mat
     CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
 !
     IF(PRESENT(str)) THEN
        CALL creatg(fid, label, str)
     ELSE
        CALL creatg(fid, label)
     END IF
     CALL attach(fid, label, 'RANK', mat%rank)
     CALL attach(fid, label, 'NNZ',  mat%nnz)
     CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
     CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
     CALL putarr(fid, TRIM(label)//'/idiag', mat%idiag)
     CALL putarr(fid, TRIM(label)//'/val', mat%val)
   END SUBROUTINE put_zcsr_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   DOUBLE PRECISION FUNCTION matnorm_csr(mat, p)
 !
 !   Compute matrix norm
 !
     TYPE(csr_mat), INTENT(in)              :: mat
     CHARACTER(len=*), OPTIONAL, INTENT(in) :: p
 !
     CHARACTER(len=4) :: norm_type
     INTEGER          :: i, j
     DOUBLE PRECISION :: temp(mat%rank)
 !
     norm_type = 'fro'
     IF(PRESENT(p)) norm_type = p
 !
     SELECT CASE (norm_type)
     CASE ('inf')
        DO i=1,mat%rank
           temp(i) = SUM(ABS(mat%val(mat%irow(i):mat%irow(i+1)-1)))
        END DO
        matnorm_csr = MAXVAL(temp)
     CASE ('1')
        temp = 0.0d0
        DO i=1,mat%rank
           DO j=mat%irow(i), mat%irow(i+1)-1
              temp(mat%cols(j)) = temp(mat%cols(j)) + ABS(mat%val(j))
           END DO
        END DO
        matnorm_csr = MAXVAL(temp)
     CASE('fro')
        matnorm_csr = SQRT(DOT_PRODUCT(mat%val, mat%val))
     END SELECT
   END FUNCTION matnorm_csr
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE full_to_csr(fullmat, mat)
 !
 !   Convert full rectangular matrix to csr mat
 !
     DOUBLE PRECISION, INTENT(inout) :: fullmat(:,:)
     TYPE(csr_mat), INTENT(out)      :: mat
 !
     INTEGER :: m, n, nnz
     INTEGER :: i, j, k
 !
     m = SIZE(fullmat,1)
     n = SIZE(fullmat,2)
     CALL init(m, 0, mat, ncols=n)
 !
 !   Determine nnz of matrix
 !
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%idiag)) DEALLOCATE(mat%idiag)
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
 !
     ALLOCATE(mat%irow(m+1))
     ALLOCATE(mat%idiag(m))
 !
 ! Clear matrix small elements of fullmat
     WHERE( ABS(fullmat) < 1.d-8) fullmat=0.0d0
 !
     mat%irow(1) = 1
     nnz = 0
     DO i=1,m
        DO j=1,n
           IF(ABS(fullmat(i,j)).GT.EPSILON(0.0d0)) THEN 
              nnz = nnz+1
              IF(m.EQ.n .AND. i.EQ.j) THEN ! Only for square matrix
                 mat%idiag(i) = nnz
              END IF
           END IF
        END DO
        mat%irow(i+1) = nnz+1
     END DO
 !
 !    Allocate and fill the csr matrix structure
 !
     mat%nnz = nnz
     ALLOCATE(mat%cols(nnz))
     ALLOCATE(mat%val(nnz))
     k=0
     DO i=1,m
        DO j=1,n
           IF(ABS(fullmat(i,j)).GT.EPSILON(0.0d0)) THEN 
              k=k+1
              mat%cols(k) = j
              mat%val(k) = fullmat(i,j)
           END IF
        END DO
     END DO
   END SUBROUTINE full_to_csr
 !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE full_to_zcsr(fullmat, mat)
 !
 !   Convert full rectangular matrix to csr mat
 !
     DOUBLE COMPLEX, INTENT(inout) :: fullmat(:,:)
     TYPE(zcsr_mat), INTENT(out)   :: mat
 !
     INTEGER :: m, n, nnz
     INTEGER :: i, j, k
 !
     m = SIZE(fullmat,1)
     n = SIZE(fullmat,2)
     CALL init(m, 0, mat, ncols=n)
 !
 !   Determine nnz of matrix
 !
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%idiag)) DEALLOCATE(mat%idiag)
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
 !
     ALLOCATE(mat%irow(m+1))
     ALLOCATE(mat%idiag(m))
 !
 ! Clear matrix small elements of fullmat
     WHERE( ABS(fullmat) < 1.d-8) fullmat=(0.0d0,0.0d0)
 !
     mat%irow(1) = 1
     nnz = 0
     DO i=1,m
        DO j=1,n
           IF(ABS(fullmat(i,j)).GT.EPSILON(0.0d0)) THEN 
              nnz = nnz+1
              IF(m.EQ.n .AND. i.EQ.j) THEN ! Only for square matrix
                 mat%idiag(i) = nnz
              END IF
           END IF
        END DO
        mat%irow(i+1) = nnz+1
     END DO
 !
 !    Allocate and fill the csr matrix structure
 !
     mat%nnz = nnz
     ALLOCATE(mat%cols(nnz))
     ALLOCATE(mat%val(nnz))
     k=0
     DO i=1,m
        DO j=1,n
           IF(ABS(fullmat(i,j)).GT.EPSILON(0.0d0)) THEN 
              k=k+1
              mat%cols(k) = j
              mat%val(k) = fullmat(i,j)
           END IF
        END DO
     END DO
   END SUBROUTINE full_to_zcsr
 !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE check_dom(mat)
 !
 !  Check whether mat is strict diagonal dominabce.
 ! 
     TYPE(csr_mat), INTENT(in) :: mat
     DOUBLE PRECISION :: arow(mat%rank), asum(mat%rank)
     INTEGER :: n, i, j1, j2, jdiag
 !
     n = mat%rank
     DO i=1,n
        j1 = mat%irow(i)
        jdiag = mat%idiag(i)
        j2 = mat%irow(i+1)-1
        asum(i) = SUM(ABS(mat%val(j1:j2))) / ABS(mat%val(jdiag)) - 1.0d0
     END DO
     WRITE(*,'(/a,1pe12.3)') 'Max of sum of off-diag', MAXVAL(asum)
 !
   END SUBROUTINE check_dom
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE kron_csr(mata, matb, matc)
 !
 !   Kronecker product of 2 CSR matrices
 !
     USE sparse, ONLY : isearch
     TYPE(csr_mat), INTENT(in)  :: mata, matb
     TYPE(csr_mat), INTENT(out) :: matc
 !
     INTEGER :: m1, n1, nnz1, m2, n2, nnz2, m, n, nnz
     INTEGER :: i,i1,i2,j1,s,s1,s2,e,e1,e2,k,nc2
 !
     m1 = mata%mrows
     n1 = mata%ncols
     nnz1 = mata%nnz
     m2 = matb%mrows
     n2 = matb%ncols
     nnz2 = matb%nnz
     m = m1*m2
     n = n1*n2
     nnz = nnz1*nnz2
 !
     CALL init(m, 0, matc, ncols=n)
     matc%nnz = nnz
     IF(ASSOCIATED(matc%irow)) DEALLOCATE(matc%irow)
     IF(ASSOCIATED(matc%idiag)) DEALLOCATE(matc%idiag)
     IF(ASSOCIATED(matc%cols)) DEALLOCATE(matc%cols)
     IF(ASSOCIATED(matc%val)) DEALLOCATE(matc%val)
     ALLOCATE(matc%irow(m+1))
     IF(m.EQ.n) THEN
        ALLOCATE(matc%idiag(m))     ! Only for square matrices
     END IF
     ALLOCATE(matc%cols(nnz))
     ALLOCATE(matc%val(nnz))
 !
     k = 0
     matc%irow(1) = 1
     DO i1=1,m1
        s1=mata%irow(i1)
        e1=mata%irow(i1+1)-1
        DO i2=1,m2
           s2=matb%irow(i2)
           e2=matb%irow(i2+1)-1
           nc2=e2-s2+1
           DO j1=s1,e1
              matc%val(k+1:k+nc2) = mata%val(j1)*matb%val(s2:e2)
              matc%cols(k+1:k+nc2) = (mata%cols(j1)-1)*n2 + matb%cols(s2:e2)
              k = k+nc2
              matc%irow((i1-1)*m2+i2+1) = k+1  ! Points to next row
           END DO
        END DO
     END DO
 !
 !  Search the diagonals
     DO i=1,matc%mrows
        s = matc%irow(i)
        e = matc%irow(i+1)-1
        matc%idiag(i) = isearch(matc%cols(s:e),i) + s
     END DO
   END SUBROUTINE kron_csr
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE csr
diff --git a/src/lapack_extra.f b/src/lapack_extra.f
index 8f5248f..d96b286 100644
--- a/src/lapack_extra.f
+++ b/src/lapack_extra.f
@@ -1,718 +1,718 @@
 !>
 !> @file lapack_extra.f
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
       DOUBLE PRECISION FUNCTION DOPGB( SUBNAM, M, N, KL, KU, IPIV )
 *
 *  -- LAPACK timing routine (version 3.0) --
 *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
 *     Courant Institute, Argonne National Lab, and Rice University
 *     June 30, 1999
 *
 *     .. Scalar Arguments ..
       CHARACTER*6        SUBNAM
       INTEGER            KL, KU, M, N
 *     ..
 *     .. Array Arguments ..
       INTEGER            IPIV( * )
 *     ..
 *
 *  Purpose
 *  =======
 *
 *  DOPGB counts operations for the LU factorization of a band matrix
 *  xGBTRF.
 *
 *  Arguments
 *  =========
 *
 *  SUBNAM  (input) CHARACTER*6
 *          The name of the subroutine.
 *
 *  M       (input) INTEGER
 *          The number of rows of the coefficient matrix.  M >= 0.
 *
 *  N       (input) INTEGER
 *          The number of columns of the coefficient matrix.  N >= 0.
 *
 *  KL      (input) INTEGER
 *          The number of subdiagonals of the matrix.  KL >= 0.
 *
 *  KU      (input) INTEGER
 *          The number of superdiagonals of the matrix.  KU >= 0.
 *
 *  IPIV    (input)  INTEGER array, dimension (min(M,N))
 *          The vector of pivot indices from DGBTRF or ZGBTRF.
 *
 *  =====================================================================
 *
 *     .. Local Scalars ..
       LOGICAL            CORZ, SORD
       CHARACTER          C1
       CHARACTER*2        C2
       CHARACTER*3        C3
       INTEGER            I, J, JP, JU, KM
       DOUBLE PRECISION   ADDFAC, ADDS, MULFAC, MULTS
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, LSAMEN
       EXTERNAL           LSAME, LSAMEN
 *     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
       DOPGB = 0
       MULTS = 0
       ADDS = 0
       C1 = SUBNAM( 1: 1 )
       C2 = SUBNAM( 2: 3 )
       C3 = SUBNAM( 4: 6 )
       SORD = LSAME( C1, 'S' ) .OR. LSAME( C1, 'D' )
       CORZ = LSAME( C1, 'C' ) .OR. LSAME( C1, 'Z' )
       IF( .NOT.( SORD .OR. CORZ ) )
      $   RETURN
       IF( LSAME( C1, 'S' ) .OR. LSAME( C1, 'D' ) ) THEN
          ADDFAC = 1
          MULFAC = 1
       ELSE
          ADDFAC = 2
          MULFAC = 6
       END IF
 *
 *     --------------------------
 *     GB:  General Band matrices
 *     --------------------------
 *
       IF( LSAMEN( 2, C2, 'GB' ) ) THEN
 *
 *        xGBTRF:  M, N, KL, KU  =>  M, N, KL, KU
 *
          IF( LSAMEN( 3, C3, 'TRF' ) ) THEN
             JU = 1
             DO 10 J = 1, MIN( M, N )
                KM = MIN( KL, M-J )
                JP = IPIV( J )
                JU = MAX( JU, MIN( JP+KU, N ) )
                IF( KM.GT.0 ) THEN
                   MULTS = MULTS + KM*( 1+JU-J )
                   ADDS = ADDS + KM*( JU-J )
                END IF
    10       CONTINUE
          END IF
 *
 *     ---------------------------------
 *     GT:  General Tridiagonal matrices
 *     ---------------------------------
 *
       ELSE IF( LSAMEN( 2, C2, 'GT' ) ) THEN
 *
 *        xGTTRF:  N  =>  M
 *
          IF( LSAMEN( 3, C3, 'TRF' ) ) THEN
             MULTS = 2*( M-1 )
             ADDS = M - 1
             DO 20 I = 1, M - 2
                IF( IPIV( I ).NE.I )
      $            MULTS = MULTS + 1
    20       CONTINUE
 *
 *        xGTTRS:  N, NRHS  =>  M, N
 *
          ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN
             MULTS = 4*N*( M-1 )
             ADDS = 3*N*( M-1 )
 *
 *        xGTSV:   N, NRHS  =>  M, N
 *
          ELSE IF( LSAMEN( 3, C3, 'SV ' ) ) THEN
             MULTS = ( 4*N+2 )*( M-1 )
             ADDS = ( 3*N+1 )*( M-1 )
             DO 30 I = 1, M - 2
                IF( IPIV( I ).NE.I )
      $            MULTS = MULTS + 1
    30       CONTINUE
          END IF
       END IF
 *
       DOPGB = MULFAC*MULTS + ADDFAC*ADDS
       RETURN
 *
 *     End of DOPGB
 *
       END
       DOUBLE PRECISION FUNCTION DOPLA( SUBNAM, M, N, KL, KU, NB )
 *
 *  -- LAPACK timing routine (version 3.0) --
 *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
 *     Courant Institute, Argonne National Lab, and Rice University
 *     June 30, 1999
 *
 *     .. Scalar Arguments ..
       CHARACTER*6        SUBNAM
       INTEGER            KL, KU, M, N, NB
 *     ..
 *
 *  Purpose
 *  =======
 *
 *  DOPLA computes an approximation of the number of floating point
 *  operations used by the subroutine SUBNAM with the given values
 *  of the parameters M, N, KL, KU, and NB.
 *
 *  This version counts operations for the LAPACK subroutines.
 *
 *  Arguments
 *  =========
 *
 *  SUBNAM  (input) CHARACTER*6
 *          The name of the subroutine.
 *
 *  M       (input) INTEGER
 *          The number of rows of the coefficient matrix.  M >= 0.
 *
 *  N       (input) INTEGER
 *          The number of columns of the coefficient matrix.
 *          For solve routine when the matrix is square,
 *          N is the number of right hand sides.  N >= 0.
 *
 *  KL      (input) INTEGER
 *          The lower band width of the coefficient matrix.
 *          If needed, 0 <= KL <= M-1.
 *          For xGEQRS, KL is the number of right hand sides.
 *
 *  KU      (input) INTEGER
 *          The upper band width of the coefficient matrix.
 *          If needed, 0 <= KU <= N-1.
 *
 *  NB      (input) INTEGER
 *          The block size.  If needed, NB >= 1.
 *
 *  Notes
 *  =====
 *
 *  In the comments below, the association is given between arguments
 *  in the requested subroutine and local arguments.  For example,
 *
 *  xGETRS:  N, NRHS  =>  M, N
 *
 *  means that arguments N and NRHS in DGETRS are passed to arguments
 *  M and N in this procedure.
 *
 *  =====================================================================
 *
 *     .. Local Scalars ..
       LOGICAL            CORZ, SORD
       CHARACTER          C1
       CHARACTER*2        C2
       CHARACTER*3        C3
       INTEGER            I
       DOUBLE PRECISION   ADDFAC, ADDS, EK, EM, EMN, EN, MULFAC, MULTS,
      $                   WL, WU
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, LSAMEN
       EXTERNAL           LSAME, LSAMEN
 *     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
 *     --------------------------------------------------------
 *     Initialize DOPLA to 0 and do a quick return if possible.
 *     --------------------------------------------------------
 *
       DOPLA = 0
       MULTS = 0
       ADDS = 0
       C1 = SUBNAM( 1: 1 )
       C2 = SUBNAM( 2: 3 )
       C3 = SUBNAM( 4: 6 )
       SORD = LSAME( C1, 'S' ) .OR. LSAME( C1, 'D' )
       CORZ = LSAME( C1, 'C' ) .OR. LSAME( C1, 'Z' )
       IF( M.LE.0 .OR. .NOT.( SORD .OR. CORZ ) )
      $   RETURN
 *
 *     ---------------------------------------------------------
 *     If the coefficient matrix is real, count each add as 1
 *     operation and each multiply as 1 operation.
 *     If the coefficient matrix is complex, count each add as 2
 *     operations and each multiply as 6 operations.
 *     ---------------------------------------------------------
 *
       IF( LSAME( C1, 'S' ) .OR. LSAME( C1, 'D' ) ) THEN
          ADDFAC = 1
          MULFAC = 1
       ELSE
          ADDFAC = 2
          MULFAC = 6
       END IF
       EM = M
       EN = N
       EK = KL
 *
 *     ---------------------------------
 *     GE:  GEneral rectangular matrices
 *     ---------------------------------
 *
       IF( LSAMEN( 2, C2, 'GE' ) ) THEN
 *
 *        xGETRF:  M, N  =>  M, N
 *
          IF( LSAMEN( 3, C3, 'TRF' ) ) THEN
             EMN = MIN( M, N )
             ADDS = EMN*( EM*EN-( EM+EN )*( EMN+1.D0 ) / 2.D0+
      $             ( EMN+1.D0 )*( 2.D0*EMN+1.D0 ) / 6.D0 )
             MULTS = ADDS + EMN*( EM-( EMN+1.D0 ) / 2.D0 )
 *
 *        xGETRS:  N, NRHS  =>  M, N
 *
          ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN
             MULTS = EN*EM*EM
             ADDS = EN*( EM*( EM-1.D0 ) )
 *
 *        xGETRI:  N  =>  M
 *
          ELSE IF( LSAMEN( 3, C3, 'TRI' ) ) THEN
             MULTS = EM*( 5.D0 / 6.D0+EM*( 1.D0 / 2.D0+EM*( 2.D0 /
      $              3.D0 ) ) )
             ADDS = EM*( 5.D0 / 6.D0+EM*( -3.D0 / 2.D0+EM*( 2.D0 /
      $             3.D0 ) ) )
 *
 *        xGEQRF or xGEQLF:  M, N  =>  M, N
 *
          ELSE IF( LSAMEN( 3, C3, 'QRF' ) .OR.
      $            LSAMEN( 3, C3, 'QR2' ) .OR.
      $            LSAMEN( 3, C3, 'QLF' ) .OR. LSAMEN( 3, C3, 'QL2' ) )
      $             THEN
             IF( M.GE.N ) THEN
                MULTS = EN*( ( ( 23.D0 / 6.D0 )+EM+EN / 2.D0 )+EN*
      $                 ( EM-EN / 3.D0 ) )
                ADDS = EN*( ( 5.D0 / 6.D0 )+EN*
      $                ( 1.D0 / 2.D0+( EM-EN / 3.D0 ) ) )
             ELSE
                MULTS = EM*( ( ( 23.D0 / 6.D0 )+2.D0*EN-EM / 2.D0 )+EM*
      $                 ( EN-EM / 3.D0 ) )
                ADDS = EM*( ( 5.D0 / 6.D0 )+EN-EM / 2.D0+EM*
      $                ( EN-EM / 3.D0 ) )
             END IF
 *
 *        xGERQF or xGELQF:  M, N  =>  M, N
 *
          ELSE IF( LSAMEN( 3, C3, 'RQF' ) .OR.
      $            LSAMEN( 3, C3, 'RQ2' ) .OR.
      $            LSAMEN( 3, C3, 'LQF' ) .OR. LSAMEN( 3, C3, 'LQ2' ) )
      $             THEN
             IF( M.GE.N ) THEN
                MULTS = EN*( ( ( 29.D0 / 6.D0 )+EM+EN / 2.D0 )+EN*
      $                 ( EM-EN / 3.D0 ) )
                ADDS = EN*( ( 5.D0 / 6.D0 )+EM+EN*
      $                ( -1.D0 / 2.D0+( EM-EN / 3.D0 ) ) )
             ELSE
                MULTS = EM*( ( ( 29.D0 / 6.D0 )+2.D0*EN-EM / 2.D0 )+EM*
      $                 ( EN-EM / 3.D0 ) )
                ADDS = EM*( ( 5.D0 / 6.D0 )+EM / 2.D0+EM*
      $                ( EN-EM / 3.D0 ) )
             END IF
 *
 *        xGEQPF: M, N => M, N
 *
          ELSE IF( LSAMEN( 3, C3, 'QPF' ) ) THEN
             EMN = MIN( M, N )
             MULTS = 2*EN*EN + EMN*( 3*EM+5*EN+2*EM*EN-( EMN+1 )*
      $              ( 4+EN+EM-( 2*EMN+1 ) / 3 ) )
             ADDS = EN*EN + EMN*( 2*EM+EN+2*EM*EN-( EMN+1 )*
      $             ( 2+EN+EM-( 2*EMN+1 ) / 3 ) )
 *
 *        xGEQRS or xGERQS:  M, N, NRHS  =>  M, N, KL
 *
          ELSE IF( LSAMEN( 3, C3, 'QRS' ) .OR. LSAMEN( 3, C3, 'RQS' ) )
      $             THEN
             MULTS = EK*( EN*( 2.D0-EK )+EM*
      $              ( 2.D0*EN+( EM+1.D0 ) / 2.D0 ) )
             ADDS = EK*( EN*( 1.D0-EK )+EM*
      $             ( 2.D0*EN+( EM-1.D0 ) / 2.D0 ) )
 *
 *        xGELQS or xGEQLS:  M, N, NRHS  =>  M, N, KL
 *
          ELSE IF( LSAMEN( 3, C3, 'LQS' ) .OR. LSAMEN( 3, C3, 'QLS' ) )
      $             THEN
             MULTS = EK*( EM*( 2.D0-EK )+EN*
      $              ( 2.D0*EM+( EN+1.D0 ) / 2.D0 ) )
             ADDS = EK*( EM*( 1.D0-EK )+EN*
      $             ( 2.D0*EM+( EN-1.D0 ) / 2.D0 ) )
 *
 *        xGEBRD:  M, N  =>  M, N
 *
          ELSE IF( LSAMEN( 3, C3, 'BRD' ) ) THEN
             IF( M.GE.N ) THEN
                MULTS = EN*( 20.D0 / 3.D0+EN*
      $                 ( 2.D0+( 2.D0*EM-( 2.D0 / 3.D0 )*EN ) ) )
                ADDS = EN*( 5.D0 / 3.D0+( EN-EM )+EN*
      $                ( 2.D0*EM-( 2.D0 / 3.D0 )*EN ) )
             ELSE
                MULTS = EM*( 20.D0 / 3.D0+EM*
      $                 ( 2.D0+( 2.D0*EN-( 2.D0 / 3.D0 )*EM ) ) )
                ADDS = EM*( 5.D0 / 3.D0+( EM-EN )+EM*
      $                ( 2.D0*EN-( 2.D0 / 3.D0 )*EM ) )
             END IF
 *
 *        xGEHRD:  N  =>  M
 *
          ELSE IF( LSAMEN( 3, C3, 'HRD' ) ) THEN
             IF( M.EQ.1 ) THEN
                MULTS = 0.D0
                ADDS = 0.D0
             ELSE
                MULTS = -13.D0 + EM*( -7.D0 / 6.D0+EM*
      $                 ( 0.5D0+EM*( 5.D0 / 3.D0 ) ) )
                ADDS = -8.D0 + EM*( -2.D0 / 3.D0+EM*
      $                ( -1.D0+EM*( 5.D0 / 3.D0 ) ) )
             END IF
 *
          END IF
 *
 *     ----------------------------
 *     GB:  General Banded matrices
 *     ----------------------------
 *        Note:  The operation count is overestimated because
 *        it is assumed that the factor U fills in to the maximum
 *        extent, i.e., that its bandwidth goes from KU to KL + KU.
 *
       ELSE IF( LSAMEN( 2, C2, 'GB' ) ) THEN
 *
 *        xGBTRF:  M, N, KL, KU  =>  M, N, KL, KU
 *
          IF( LSAMEN( 3, C3, 'TRF' ) ) THEN
             DO 10 I = MIN( M, N ), 1, -1
                WL = MAX( 0, MIN( KL, M-I ) )
                WU = MAX( 0, MIN( KL+KU, N-I ) )
                MULTS = MULTS + WL*( 1.D0+WU )
                ADDS = ADDS + WL*WU
    10       CONTINUE
 *
 *        xGBTRS:  N, NRHS, KL, KU  =>  M, N, KL, KU
 *
          ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN
             WL = MAX( 0, MIN( KL, M-1 ) )
             WU = MAX( 0, MIN( KL+KU, M-1 ) )
             MULTS = EN*( EM*( WL+1.D0+WU )-0.5D0*
      $              ( WL*( WL+1.D0 )+WU*( WU+1.D0 ) ) )
             ADDS = EN*( EM*( WL+WU )-0.5D0*
      $             ( WL*( WL+1.D0 )+WU*( WU+1.D0 ) ) )
 *
          END IF
 *
 *     --------------------------------------
 *     PO:  POsitive definite matrices
 *     PP:  Positive definite Packed matrices
 *     --------------------------------------
 *
       ELSE IF( LSAMEN( 2, C2, 'PO' ) .OR. LSAMEN( 2, C2, 'PP' ) ) THEN
 *
 *        xPOTRF:  N  =>  M
 *
          IF( LSAMEN( 3, C3, 'TRF' ) ) THEN
             MULTS = EM*( 1.D0 / 3.D0+EM*( 1.D0 / 2.D0+EM*( 1.D0 /
      $              6.D0 ) ) )
             ADDS = ( 1.D0 / 6.D0 )*EM*( -1.D0+EM*EM )
 *
 *        xPOTRS:  N, NRHS  =>  M, N
 *
          ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN
             MULTS = EN*( EM*( EM+1.D0 ) )
             ADDS = EN*( EM*( EM-1.D0 ) )
 *
 *        xPOTRI:  N  =>  M
 *
          ELSE IF( LSAMEN( 3, C3, 'TRI' ) ) THEN
             MULTS = EM*( 2.D0 / 3.D0+EM*( 1.D0+EM*( 1.D0 / 3.D0 ) ) )
             ADDS = EM*( 1.D0 / 6.D0+EM*( -1.D0 / 2.D0+EM*( 1.D0 /
      $             3.D0 ) ) )
 *
          END IF
 *
 *     ------------------------------------
 *     PB:  Positive definite Band matrices
 *     ------------------------------------
 *
       ELSE IF( LSAMEN( 2, C2, 'PB' ) ) THEN
 *
 *        xPBTRF:  N, K  =>  M, KL
 *
          IF( LSAMEN( 3, C3, 'TRF' ) ) THEN
             MULTS = EK*( -2.D0 / 3.D0+EK*( -1.D0+EK*( -1.D0 / 3.D0 ) ) )
      $               + EM*( 1.D0+EK*( 3.D0 / 2.D0+EK*( 1.D0 / 2.D0 ) ) )
             ADDS = EK*( -1.D0 / 6.D0+EK*( -1.D0 / 2.D0+EK*( -1.D0 /
      $             3.D0 ) ) ) + EM*( EK / 2.D0*( 1.D0+EK ) )
 *
 *        xPBTRS:  N, NRHS, K  =>  M, N, KL
 *
          ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN
             MULTS = EN*( ( 2*EM-EK )*( EK+1.D0 ) )
             ADDS = EN*( EK*( 2*EM-( EK+1.D0 ) ) )
 *
          END IF
 *
 *     ----------------------------------
 *     PT:  Positive definite Tridiagonal
 *     ----------------------------------
 *
       ELSE IF( LSAMEN( 2, C2, 'PT' ) ) THEN
 *
 *        xPTTRF:  N  =>  M
 *
          IF( LSAMEN( 3, C3, 'TRF' ) ) THEN
             MULTS = 2*( EM-1 )
             ADDS = EM - 1
 *
 *        xPTTRS:  N, NRHS  =>  M, N
 *
          ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN
             MULTS = EN*( 3*EM-2 )
             ADDS = EN*( 2*( EM-1 ) )
 *
 *        xPTSV:  N, NRHS  =>  M, N
 *
          ELSE IF( LSAMEN( 3, C3, 'SV ' ) ) THEN
             MULTS = 2*( EM-1 ) + EN*( 3*EM-2 )
             ADDS = EM - 1 + EN*( 2*( EM-1 ) )
          END IF
 *
 *     --------------------------------------------------------
 *     SY:  SYmmetric indefinite matrices
 *     SP:  Symmetric indefinite Packed matrices
 *     HE:  HErmitian indefinite matrices (complex only)
 *     HP:  Hermitian indefinite Packed matrices (complex only)
 *     --------------------------------------------------------
 *
       ELSE IF( LSAMEN( 2, C2, 'SY' ) .OR. LSAMEN( 2, C2, 'SP' ) .OR.
      $         LSAMEN( 3, SUBNAM, 'CHE' ) .OR.
      $         LSAMEN( 3, SUBNAM, 'ZHE' ) .OR.
      $         LSAMEN( 3, SUBNAM, 'CHP' ) .OR.
      $         LSAMEN( 3, SUBNAM, 'ZHP' ) ) THEN
 *
 *        xSYTRF:  N  =>  M
 *
          IF( LSAMEN( 3, C3, 'TRF' ) ) THEN
             MULTS = EM*( 10.D0 / 3.D0+EM*
      $              ( 1.D0 / 2.D0+EM*( 1.D0 / 6.D0 ) ) )
             ADDS = EM / 6.D0*( -1.D0+EM*EM )
 *
 *        xSYTRS:  N, NRHS  =>  M, N
 *
          ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN
             MULTS = EN*EM*EM
             ADDS = EN*( EM*( EM-1.D0 ) )
 *
 *        xSYTRI:  N  =>  M
 *
          ELSE IF( LSAMEN( 3, C3, 'TRI' ) ) THEN
             MULTS = EM*( 2.D0 / 3.D0+EM*EM*( 1.D0 / 3.D0 ) )
             ADDS = EM*( -1.D0 / 3.D0+EM*EM*( 1.D0 / 3.D0 ) )
 *
 *        xSYTRD, xSYTD2:  N  =>  M
 *
          ELSE IF( LSAMEN( 3, C3, 'TRD' ) .OR. LSAMEN( 3, C3, 'TD2' ) )
      $             THEN
             IF( M.EQ.1 ) THEN
                MULTS = 0.D0
                ADDS = 0.D0
             ELSE
                MULTS = -15.D0 + EM*( -1.D0 / 6.D0+EM*
      $                 ( 5.D0 / 2.D0+EM*( 2.D0 / 3.D0 ) ) )
                ADDS = -4.D0 + EM*( -8.D0 / 3.D0+EM*
      $                ( 1.D0+EM*( 2.D0 / 3.D0 ) ) )
             END IF
          END IF
 *
 *     -------------------
 *     Triangular matrices
 *     -------------------
 *
       ELSE IF( LSAMEN( 2, C2, 'TR' ) .OR. LSAMEN( 2, C2, 'TP' ) ) THEN
 *
 *        xTRTRS:  N, NRHS  =>  M, N
 *
          IF( LSAMEN( 3, C3, 'TRS' ) ) THEN
             MULTS = EN*EM*( EM+1.D0 ) / 2.D0
             ADDS = EN*EM*( EM-1.D0 ) / 2.D0
 *
 *        xTRTRI:  N  =>  M
 *
          ELSE IF( LSAMEN( 3, C3, 'TRI' ) ) THEN
             MULTS = EM*( 1.D0 / 3.D0+EM*( 1.D0 / 2.D0+EM*( 1.D0 /
      $              6.D0 ) ) )
             ADDS = EM*( 1.D0 / 3.D0+EM*( -1.D0 / 2.D0+EM*( 1.D0 /
      $             6.D0 ) ) )
 *
          END IF
 *
       ELSE IF( LSAMEN( 2, C2, 'TB' ) ) THEN
 *
 *        xTBTRS:  N, NRHS, K  =>  M, N, KL
 *
          IF( LSAMEN( 3, C3, 'TRS' ) ) THEN
             MULTS = EN*( EM*( EM+1.D0 ) / 2.D0-( EM-EK-1.D0 )*
      $              ( EM-EK ) / 2.D0 )
             ADDS = EN*( EM*( EM-1.D0 ) / 2.D0-( EM-EK-1.D0 )*( EM-EK ) /
      $             2.D0 )
          END IF
 *
 *     --------------------
 *     Trapezoidal matrices
 *     --------------------
 *
       ELSE IF( LSAMEN( 2, C2, 'TZ' ) ) THEN
 *
 *        xTZRQF:  M, N => M, N
 *
          IF( LSAMEN( 3, C3, 'RQF' ) ) THEN
             EMN = MIN( M, N )
             MULTS = 3*EM*( EN-EM+1 ) + ( 2*EN-2*EM+3 )*
      $              ( EM*EM-EMN*( EMN+1 ) / 2 )
             ADDS = ( EN-EM+1 )*( EM+2*EM*EM-EMN*( EMN+1 ) )
          END IF
 *
 *     -------------------
 *     Orthogonal matrices
 *     -------------------
 *
       ELSE IF( ( SORD .AND. LSAMEN( 2, C2, 'OR' ) ) .OR.
      $         ( CORZ .AND. LSAMEN( 2, C2, 'UN' ) ) ) THEN
 *
 *        -MQR, -MLQ, -MQL, or -MRQ:  M, N, K, SIDE  =>  M, N, KL, KU
 *           where KU<= 0 indicates SIDE = 'L'
 *           and   KU> 0  indicates SIDE = 'R'
 *
          IF( LSAMEN( 3, C3, 'MQR' ) .OR. LSAMEN( 3, C3, 'MLQ' ) .OR.
      $       LSAMEN( 3, C3, 'MQL' ) .OR. LSAMEN( 3, C3, 'MRQ' ) ) THEN
             IF( KU.LE.0 ) THEN
                MULTS = EK*EN*( 2.D0*EM+2.D0-EK )
                ADDS = EK*EN*( 2.D0*EM+1.D0-EK )
             ELSE
                MULTS = EK*( EM*( 2.D0*EN-EK )+
      $                 ( EM+EN+( 1.D0-EK ) / 2.D0 ) )
                ADDS = EK*EM*( 2.D0*EN+1.D0-EK )
             END IF
 *
 *        -GQR or -GQL:  M, N, K  =>  M, N, KL
 *
          ELSE IF( LSAMEN( 3, C3, 'GQR' ) .OR. LSAMEN( 3, C3, 'GQL' ) )
      $             THEN
             MULTS = EK*( -5.D0 / 3.D0+( 2.D0*EN-EK )+
      $              ( 2.D0*EM*EN+EK*( ( 2.D0 / 3.D0 )*EK-EM-EN ) ) )
             ADDS = EK*( 1.D0 / 3.D0+( EN-EM )+
      $             ( 2.D0*EM*EN+EK*( ( 2.D0 / 3.D0 )*EK-EM-EN ) ) )
 *
 *        -GLQ or -GRQ:  M, N, K  =>  M, N, KL
 *
          ELSE IF( LSAMEN( 3, C3, 'GLQ' ) .OR. LSAMEN( 3, C3, 'GRQ' ) )
      $             THEN
             MULTS = EK*( -2.D0 / 3.D0+( EM+EN-EK )+
      $              ( 2.D0*EM*EN+EK*( ( 2.D0 / 3.D0 )*EK-EM-EN ) ) )
             ADDS = EK*( 1.D0 / 3.D0+( EM-EN )+
      $             ( 2.D0*EM*EN+EK*( ( 2.D0 / 3.D0 )*EK-EM-EN ) ) )
 *
          END IF
 *
       END IF
 *
       DOPLA = MULFAC*MULTS + ADDFAC*ADDS
 *
       RETURN
 *
 *     End of DOPLA
 *
       END
       LOGICAL          FUNCTION LSAMEN( N, CA, CB )
 *
 *  -- LAPACK auxiliary routine (version 2.0) --
 *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
 *     Courant Institute, Argonne National Lab, and Rice University
 *     September 30, 1994
 *
 *     .. Scalar Arguments ..
       CHARACTER*( * )    CA, CB
       INTEGER            N
 *     ..
 *
 *  Purpose
 *  =======
 *
 *  LSAMEN  tests if the first N letters of CA are the same as the
 *  first N letters of CB, regardless of case.
 *  LSAMEN returns .TRUE. if CA and CB are equivalent except for case
 *  and .FALSE. otherwise.  LSAMEN also returns .FALSE. if LEN( CA )
 *  or LEN( CB ) is less than N.
 *
 *  Arguments
 *  =========
 *
 *  N       (input) INTEGER
 *          The number of characters in CA and CB to be compared.
 *
 *  CA      (input) CHARACTER*(*)
 *  CB      (input) CHARACTER*(*)
 *          CA and CB specify two character strings of length at least N.
 *          Only the first N characters of each string will be accessed.
 *
 * =====================================================================
 *
 *     .. Local Scalars ..
       INTEGER            I
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME
       EXTERNAL           LSAME
 *     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          LEN
 *     ..
 *     .. Executable Statements ..
 *
       LSAMEN = .FALSE.
       IF( LEN( CA ).LT.N .OR. LEN( CB ).LT.N )
      $   GO TO 20
 *
 *     Do for each character in the two strings.
 *
       DO 10 I = 1, N
 *
 *        Test if the characters are equal using LSAME.
 *
          IF( .NOT.LSAME( CA( I: I ), CB( I: I ) ) )
      $      GO TO 20
 *
    10 CONTINUE
       LSAMEN = .TRUE.
 *
    20 CONTINUE
       RETURN
 *
 *     End of LSAMEN
 *
       END
diff --git a/src/math_util.f90 b/src/math_util.f90
index 60817ff..6f4a4e5 100644
--- a/src/math_util.f90
+++ b/src/math_util.f90
@@ -1,291 +1,291 @@
 !>
 !> @file math_util.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE math_util
 !
 !    MATH_UTIL: Some math utilities.
 !
 !    T.M. Tran, CRPP-EPFL
 !    December 2012
 !
 !
 !   Notes:
 !     - Assume the Fortran 2008 intrinsic BESSEL_JN(n,x) exists!
 !
   IMPLICIT NONE
   DOUBLE PRECISION, PARAMETER :: pi=4.0d0*ATAN(1.0d0)
 !
 CONTAINS
   ELEMENTAL FUNCTION bessjp(n,x)
 !
 !    Derivative of J_n
 !
     DOUBLE PRECISION :: bessjp
     INTEGER, INTENT(in) :: n
     DOUBLE PRECISION, INTENT(in) :: x
 !
     IF(n.EQ.0) THEN
        bessjp = -bessel_jn(1,x)
     ELSE
        bessjp = 0.5d0*(bessel_jn(n-1,x)-bessel_jn(n+1,x))
     END IF
   END FUNCTION bessjp
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION root_bessj(n, s, info)
 !
 !   s^th root of j_n
 !
     DOUBLE PRECISION :: root_bessj
     INTEGER, INTENT(in) :: n
     INTEGER, INTENT(in) :: s
     INTEGER, OPTIONAL, INTENT(out) :: info
 !
     DOUBLE PRECISION :: b0,b1,b2,b3,b5,b7,t0,t1,t3,t5,t7,fn,fk,f1,f2,f3
     DOUBLE PRECISION :: c1=1.8557571d0, c2=1.033150d0, c3=.00397d0, c4=.0908d0,&
          &              c5=.043d0
     DOUBLE PRECISION :: zero
     INTEGER :: iter
 !
     fn = REAL(ABS(n),8)
     IF(s.EQ.1) THEN ! first zero
        IF(n.EQ.0) THEN
           zero = c1+c2-c3-c4+c5
        ELSE
           f1 = fn**(1.d0/3.d0)
           f2 = f1*f1*fn
           f3 = f1*fn*fn
           zero = fn+c1*f1+(c2/f1)-(c3/fn)-(c4/f2)+(c5/f3)
        END IF
     ELSE           ! Other zeros
        t0 = 4.d0*fn*fn
        t1 = t0-1.d0
        t3 = 4.d0*t1*(7.d0*t0-31.d0)
        t5 = 32.d0*t1*((83.d0*t0-982.d0)*t0+3779.d0)
        t7 = 64.d0*t1*(((6949.d0*t0-153855.d0)*t0+1585743.d0)*t0  &
             -6277237.d0)
        fk = REAL(s,8)
 !
        b0 = (fk+.5d0*fn-.25d0)*pi!    mac mahon's series for k>>n
        b1 = 8.d0*b0
        b2 = b1*b1
        b3 = 3.d0*b1*b2
        b5 = 5.d0*b3*b2
        b7 = 7.d0*b5*b2
        zero = b0-(t1/b1)-(t3/b3)-(t5/b5)-(t7/b7)
     END IF
     CALL newton(iter)
     IF(PRESENT(info)) info = iter
     root_bessj = zero
   CONTAINS
     SUBROUTINE newton(iter)
       INTEGER, INTENT(out) :: iter
       INTEGER :: itermx = 20
       DOUBLE PRECISION :: dx, tol
       tol = EPSILON(1.0d0)*zero
       iter = 0
       DO
          iter = iter+1
          dx = -bessel_jn(n,zero)/bessjp(n,zero)
          zero = zero+dx
          IF(iter.GE.itermx .OR. ABS(dx).LT.tol) EXIT
       END DO
     END SUBROUTINE newton
   END FUNCTION root_bessj
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION root_bessjp(n, s, info)
 !
 !   s^th root of derivative of j_n
 !
     DOUBLE PRECISION :: root_bessjp
     INTEGER, INTENT(in) :: n
     INTEGER, INTENT(in) :: s
     INTEGER, OPTIONAL, INTENT(out) :: info
 !
     DOUBLE PRECISION :: c1=0.8086165D0, c2=0.072490D0, c3=.05097D0, c4=.0094D0
     DOUBLE PRECISION :: b0,b1,b2,b3,b5,b7,t0,t1,t3,t5,t7,fn,fk,f1,f2
     INTEGER :: iter
     DOUBLE PRECISION :: zero
 !
     IF(n.EQ.0 .AND. s.EQ.1) THEN
        root_bessjp = 0.0d0
        IF(PRESENT(info)) info = 0
        RETURN
     END IF
 !
     fn = REAL(ABS(n),8)
     fk = REAL(s,8)
 !
     IF(s.GT.1) THEN
 !
 !        McMahon's series for s >> n
        b0 = (fk+.5d0*fn-.75d0)*pi
        b1 = 8.d0*b0
        b2 = b1*b1
        b3 = 3.d0*b1*b2
        b5 = 5.d0*b3*b2
        b7 = 7.d0*b5*b2
        t0 = 4.d0*fn*fn
        t1 = t0+3.d0
        t3 = 4.d0*((7.d0*t0+82.d0)*t0-9.d0)
        t5 = 32.d0*(((83.d0*t0+2075.d0)*t0-3039.d0)*t0+3537.d0)
        t7 = 64.d0*((((6949.d0*t0+296492.d0)*t0-1248002.d0)*t0  &
             +7414380.d0)*t0-5853627.d0)
        zero = b0-(t1/b1)-(t3/b3)-(t5/b5)-(t7/b7)
     ELSE              
 !
 !        Tchebychev's series for s <= n
        f1 = fn**(1.d0/3.d0)
        f2 = f1*f1*fn
        zero = fn+c1*f1+(c2/f1)-(c3/fn)+(c4/f2)
     END IF
 !
     CALL newton(iter)
     root_bessjp = zero
     IF(PRESENT(info)) info = iter
   CONTAINS
     SUBROUTINE newton(iter)
       INTEGER, INTENT(out) :: iter
       INTEGER :: itermx = 20
       DOUBLE PRECISION :: dx, tol
       tol = EPSILON(1.0d0)*zero
       iter = 0
       DO
          iter = iter+1
          dx = -bessel_jn(n,zero)/bessjp(n,zero)
          dx = -2.0d0 * (bessel_jn(n-1,zero)-bessel_jn(n+1,zero)) / &
               &  (bessel_jn(n-2,zero)-2.d0*bessel_jn(n,zero)+&
               &   bessel_jn(n+2,zero))
          zero = zero+dx
          IF(iter.GE.itermx .OR. ABS(dx).LT.tol) EXIT
       END DO
     END SUBROUTINE newton
   END FUNCTION root_bessjp
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 !!$  PURE FUNCTION BESSEL_JN(n,x) RESULT(bessj)
 !!$    DOUBLE PRECISION, INTENT(in) :: x
 !!$    DOUBLE PRECISION ::  bessj
 !!$    DOUBLE PRECISION BIGNO,BIGNI
 !!$    INTEGER n,IACC
 !!$    PARAMETER (IACC=40,BIGNO=1.d10,BIGNI=1.d-10)
 !!$    INTEGER j,jsum,m
 !!$    DOUBLE PRECISION ax,bj,bjm,bjp,sum,tox,bessj0,bessj1
 !!$    IF( n.EQ.0 ) THEN 
 !!$       bessj = bessj0(x)
 !!$       RETURN
 !!$    ELSE IF( n.EQ.1 ) THEN 
 !!$       bessj = bessj1(x)
 !!$       RETURN
 !!$    ENDIF
 !!$    ax=ABS(x)
 !!$    IF(ax.EQ.0.d0)THEN
 !!$       bessj=0.d0
 !!$    ELSE IF(ax.GT.float(n))THEN
 !!$       tox=2./ax
 !!$       bjm=bessj0(ax)
 !!$       bj=bessj1(ax)
 !!$       DO j=1,n-1
 !!$          bjp=j*tox*bj-bjm
 !!$          bjm=bj
 !!$          bj=bjp
 !!$       END DO
 !!$       bessj=bj
 !!$    ELSE
 !!$       tox=2./ax
 !!$       m=2*((n+INT(SQRT(float(IACC*n))))/2)
 !!$       bessj=0.d0
 !!$       jsum=0
 !!$       sum=0.d0
 !!$       bjp=0.d0
 !!$       bj=1.
 !!$       DO j=m,1,-1
 !!$          bjm=j*tox*bj-bjp
 !!$          bjp=bj
 !!$          bj=bjm
 !!$          IF(ABS(bj).GT.BIGNO)THEN
 !!$             bj=bj*BIGNI
 !!$             bjp=bjp*BIGNI
 !!$             bessj=bessj*BIGNI
 !!$             sum=sum*BIGNI
 !!$          ENDIF
 !!$          IF(jsum.NE.0)sum=sum+bj
 !!$          jsum=1-jsum
 !!$          IF(j.EQ.n)bessj=bjp
 !!$       END DO
 !!$       sum=2.*sum-bj
 !!$       bessj=bessj/sum
 !!$    ENDIF
 !!$    IF(x.LT.0.d0.AND.MOD(n,2).EQ.1)bessj=-bessj
 !!$    RETURN
 !!$  END FUNCTION bessel_jn
 !!$
 !!$  PURE FUNCTION bessj0(x)
 !!$    DOUBLE PRECISION, INTENT(in) :: x
 !!$    DOUBLE PRECISION bessj0
 !!$    DOUBLE PRECISION ax,xx,z
 !!$    DOUBLE PRECISION p1,p2,p3,p4,p5,q1,q2,q3,q4,q5,r1,r2,r3,r4,r5,r6,s1,s2,s3,s4,s5,s6,y
 !!$    SAVE p1,p2,p3,p4,p5,q1,q2,q3,q4,q5,r1,r2,r3,r4,r5,r6,s1,s2,s3,s4,s5,s6
 !!$    DATA p1,p2,p3,p4,p5/1.d0,-.1098628627d-2,.2734510407d-4,-.2073370639d-5,.2093887211d-6/
 !!$    DATA q1,q2,q3,q4,q5/-.1562499995d-1,.1430488765d-3,-.6911147651d-5,.7621095161d-6,-.934945152d-7/
 !!$    DATA r1,r2,r3,r4,r5,r6/57568490574.d0,-13362590354.d0,651619640.7d0,-11214424.18d0,&
 !!$         &   77392.33017d0,-184.9052456d0/
 !!$    DATA s1,s2,s3,s4,s5,s6/57568490411.d0,1029532985.d0,9494680.718d0,59272.64853d0,267.8532712d0,1.d0/
 !!$    IF(ABS(x).LT.8.)THEN
 !!$       y=x**2
 !!$       bessj0=(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))))/(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6)))))
 !!$    ELSE
 !!$       ax=ABS(x)
 !!$       z=8./ax
 !!$       y=z**2
 !!$       xx=ax-.785398164
 !!$       bessj0=SQRT(.636619772/ax)*(COS(xx)*(p1+y*(p2+y*(p3+y*(p4+y*p5))))-z*SIN(xx)*(q1+y*(q2+y*(q3+y*(q4+y*q5)))))
 !!$    ENDIF
 !!$    RETURN
 !!$  END FUNCTION bessj0
 !!$
 !!$  PURE FUNCTION bessj1(x)
 !!$    DOUBLE PRECISION, INTENT(in) :: x
 !!$    DOUBLE PRECISION bessj1
 !!$    DOUBLE PRECISION ax,xx,z
 !!$    DOUBLE PRECISION p1,p2,p3,p4,p5,q1,q2,q3,q4,q5,r1,r2,r3,r4,r5,r6,s1,s2,s3,s4,s5,s6,y
 !!$    SAVE p1,p2,p3,p4,p5,q1,q2,q3,q4,q5,r1,r2,r3,r4,r5,r6,s1,s2,s3,s4,s5,s6
 !!$    DATA r1,r2,r3,r4,r5,r6/72362614232.d0,-7895059235.d0,242396853.1d0,-2972611.439d0,15704.48260d0,-30.16036606d0/
 !!$    DATA s1,s2,s3,s4,s5,s6/144725228442.d0,2300535178.d0,18583304.74d0,99447.43394d0,376.9991397d0,1.d0/
 !!$    DATA p1,p2,p3,p4,p5/1.d0,.183105d-2,-.3516396496d-4,.2457520174d-5,-.240337019d-6/
 !!$    DATA q1,q2,q3,q4,q5/.04687499995d0,-.2002690873d-3,.8449199096d-5,-.88228987d-6,.105787412d-6/
 !!$    IF(ABS(x).LT.8.)THEN
 !!$       y=x**2
 !!$       bessj1=x*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))))/(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6)))))
 !!$    ELSE
 !!$       ax=ABS(x)
 !!$       z=8.d0/ax
 !!$       y=z**2
 !!$       xx=ax-2.356194491d0
 !!$       bessj1=SQRT(.636619772d0/ax)*(COS(xx)*(p1+y*(p2+y*(p3+y*(p4+y*p5))))- &
 !!$            & z*SIN(xx)*(q1+y*(q2+y*(q3+y*(q4+y*q5)))))*SIGN(1.d0,x)
 !!$    ENDIF
 !!$    RETURN
 !!$  END FUNCTION bessj1
 
 END MODULE math_util
diff --git a/src/matrix.f90 b/src/matrix.f90
index 22ea822..fc8372e 100644
--- a/src/matrix.f90
+++ b/src/matrix.f90
@@ -1,3295 +1,3295 @@
 !>
 !> @file matrix.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE matrix
 !
 !    MATRIX: Simple interface to the direct solver LAPACK.
 !
 !    T.M. Tran, CRPP-EPFL
 !    February 2007
 !
   IMPLICIT NONE
 !
   TYPE gbmat   ! Lapack General Band matrix storage
      INTEGER :: kl, ku, rank
      INTEGER :: mrows, ncols
      INTEGER :: nterms, kmat
      INTEGER, DIMENSION(:), POINTER :: piv => null()
      DOUBLE PRECISION, DIMENSION(:,:), POINTER :: val => null()
   END TYPE gbmat
 !
   TYPE gemat   ! Lapack General DENSE matrix storage
      INTEGER :: rank
      INTEGER :: mrows, ncols
      INTEGER :: nterms, kmat
      INTEGER, DIMENSION(:), POINTER :: piv => null()
      DOUBLE PRECISION, DIMENSION(:,:), POINTER :: val => null()
   END TYPE gemat
 !
   TYPE pbmat   ! Lapack Pack Band matrix storage (super-diagonals)
      INTEGER :: ku, rank
      INTEGER :: nterms, kmat
      DOUBLE PRECISION, DIMENSION(:,:), POINTER :: val => null()
   END TYPE pbmat
 !
   TYPE zgbmat   ! Lapack General Band matrix storage
      INTEGER :: kl, ku, rank
      INTEGER :: mrows, ncols
      INTEGER :: nterms, kmat
      INTEGER, DIMENSION(:), POINTER :: piv => null()
      DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: val => NULL()
   END TYPE zgbmat
 !
   TYPE zgemat   ! Lapack General DENSE matrix storage
      INTEGER :: rank
      INTEGER :: mrows, ncols
      INTEGER :: nterms, kmat
      INTEGER, DIMENSION(:), POINTER :: piv => null()
      DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: val => null()
   END TYPE zgemat
 !
   TYPE zpbmat   ! Lapack Pack Band matrix storage (super-diagonals)
      INTEGER :: ku, rank
      INTEGER :: nterms, kmat
      DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: val => null()
   END TYPE zpbmat
 !
   TYPE periodic_mat
     TYPE(gbmat) :: mat
     INTEGER :: nterms
     DOUBLE PRECISION, DIMENSION(:,:), POINTER :: &
          &  matu => null(), &
          &  matvt => null()
   END TYPE periodic_mat
 !
   TYPE zperiodic_mat
     TYPE(zgbmat) :: mat
     INTEGER :: nterms
     DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: &
          &  matu => null(), &
          &  matvt => null()
  END TYPE zperiodic_mat
 !
 !--------------------------------------------------------------------------------
   INTERFACE init
      MODULE PROCEDURE init_gb,  init_ge,  init_pb, &
           &           init_zgb, init_zge, init_zpb, &
           &           init_periodic, init_zperiodic
   END INTERFACE
   INTERFACE getvalp
      MODULE PROCEDURE getvalp_gb,  getvalp_ge,  getvalp_pb, &
           &           getvalp_zgb, getvalp_zge, getvalp_zpb
   END INTERFACE
   INTERFACE mcopy
      MODULE PROCEDURE mcopy_gb,  mcopy_ge,  mcopy_pb, &
           &           mcopy_zgb, mcopy_zge, mcopy_zpb, &
           &           mcopy_periodic, mcopy_zperiodic
   END INTERFACE
   INTERFACE maddto
      MODULE PROCEDURE maddto_gb,  maddto_ge,  maddto_pb, &
           &           maddto_zgb, maddto_zge, maddto_zpb, &
           &           maddto_periodic, maddto_zperiodic
   END INTERFACE
   INTERFACE destroy
      MODULE PROCEDURE destroy_gb,  destroy_ge,  destroy_pb,  &
           &           destroy_zgb, destroy_zge, destroy_zpb, &
           &           destroy_periodic, destroy_zperiodic
   END INTERFACE
   INTERFACE updtmat
      MODULE PROCEDURE updt_gb,  updt_ge,  updt_pb, &
           &           updt_zgb, updt_zpb, &
           &           updt_periodic, updt_zperiodic
   END INTERFACE
   INTERFACE getele
      MODULE PROCEDURE getele_gb,  getele_pb, &
           &           getele_zgb, getele_zpb, &
           &           getele_periodic, getele_zperiodic
   END INTERFACE
   INTERFACE putele
      MODULE PROCEDURE putele_gb,  putele_pb, &
           &           putele_zgb, putele_zpb, &
           &           putele_periodic, putele_zperiodic
   END INTERFACE
   INTERFACE getcol
      MODULE PROCEDURE getcol_gb,  getcol_pb, &
           &           getcol_zgb, getcol_zpb, &
           &           getcol_periodic, getcol_zperiodic
   END INTERFACE
   INTERFACE getrow
      MODULE PROCEDURE getrow_gb,  getrow_ge,  getrow_pb, &
           &           getrow_zgb, getrow_zpb, &
           &           getrow_periodic, getrow_zperiodic
   END INTERFACE
   INTERFACE putcol
      MODULE PROCEDURE putcol_gb,  putcol_ge,  putcol_pb, &
           &           putcol_zgb, putcol_zpb, &
           &           putcol_periodic, putcol_zperiodic
   END INTERFACE
   INTERFACE putrow
      MODULE PROCEDURE putrow_gb,   putrow_ge,  putrow_pb, &
           &           putrow_zgb, putrow_zpb, &
           &           putrow_periodic, putrow_zperiodic
   END INTERFACE
   INTERFACE factor
      MODULE PROCEDURE factor_gb,  factor_ge,  factor_pb, &
           &           factor_zgb, factor_zge, factor_zpb, &
           &           factor_periodic, factor_zperiodic
   END INTERFACE
   INTERFACE bsolve
      MODULE PROCEDURE bsolve_gb1,  bsolve_gbn,  bsolve_ge1,  bsolve_gen, &
           &           bsolve_pb1,  bsolve_pbn, &
           &           bsolve_periodic1, bsolve_periodicn, &
           &           bsolve_zperiodic1, bsolve_zperiodicn, &
           &           bsolve_zgb1, bsolve_zgbn, bsolve_zge1, bsolve_zgen, &
           &           bsolve_zpb1, bsolve_zpbn
   END INTERFACE
   INTERFACE vmx
      MODULE PROCEDURE vmx_gb,  vmx_gbn,  vmx_pb,  vmx_pbn, &
           &           vmx_zgb, vmx_zgbn, vmx_zpb, vmx_zpbn, &
           &           vmx_ge,  vmx_gen,  vmx_zge, vmx_zgen, &
           &           vmx_periodic, vmx_zperiodic
   END INTERFACE
   INTERFACE determinant
      MODULE PROCEDURE determinant_ge,  determinant_gb,  determinant_pb, &
           &           determinant_zge, determinant_zgb, determinant_zpb
   END INTERFACE
   INTERFACE putmat
      MODULE PROCEDURE putmat_gb
   END INTERFACE
   INTERFACE  getmat
      MODULE PROCEDURE getmat_gb
   END INTERFACE
   INTERFACE kron
      MODULE PROCEDURE kron_ge
   END INTERFACE kron
 !
 CONTAINS
 !===========================================================================
   SUBROUTINE init_ge(n, nterms, mat, kmat, mrows)
 !
 !  Initialize Lapack General Dense matrice
 !
     INTEGER, INTENT(in) :: n, nterms
     INTEGER, OPTIONAL :: kmat
     INTEGER, OPTIONAL :: mrows
     TYPE(gemat) :: mat
 !
     mat%ncols = n
     mat%mrows = n
     IF(PRESENT(mrows)) THEN
        mat%mrows = mrows
     END IF
     mat%rank = n   ! Warning: ok if square matrix
     mat%nterms = nterms
     IF(PRESENT(kmat)) mat%kmat = kmat
     IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv)
     ALLOCATE(mat%val(mat%mrows,mat%ncols))
     ALLOCATE(mat%piv(MIN(mat%mrows,mat%ncols)))
     mat%val = 0.0d0
     mat%piv = 0
   END SUBROUTINE init_ge
 !===========================================================================
   SUBROUTINE init_gb(kl, ku, n, nterms, mat, kmat, mrows)
 !
 !  Initialize Lapack General Banded matrice
 !
     INTEGER, INTENT(in) :: kl, ku, n, nterms
     INTEGER, OPTIONAL :: kmat
     INTEGER, OPTIONAL :: mrows
     TYPE(gbmat) :: mat
     INTEGER :: lda
 !
     mat%kl = kl
     mat%ku = ku
     mat%ncols = n
     mat%mrows = n
     IF(PRESENT(mrows)) THEN
        mat%mrows = mrows
     END IF
     mat%rank = n   ! Warning: ok if square matrix
     mat%nterms = nterms
     IF(PRESENT(kmat)) mat%kmat = kmat
     lda = 2*kl + ku + 1
     IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv)
     ALLOCATE(mat%val(lda,n))
     ALLOCATE(mat%piv(n))
     mat%val = 0.0d0
     mat%piv = 0
   END SUBROUTINE init_gb
 !===========================================================================
   SUBROUTINE init_periodic(kl, ku, n, nterms, mat, kmat)
 !
 !  Initialize Lapack Periodic General Banded matrice
 !
     INTEGER, INTENT(in) :: kl, ku, n, nterms
     INTEGER, OPTIONAL :: kmat
     TYPE(periodic_mat) :: mat
     INTEGER :: i,j
     DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0
 !
 ! In band matrix matp%mat is a GB matrix
     IF( PRESENT(kmat))  THEN
        CALL init(kl, ku, n, nterms, mat%mat, kmat)
     ELSE
        CALL init(kl, ku, n, nterms, mat%mat)
     END IF
     mat%nterms = nterms
 !
 !  Off band matrices
     IF( ASSOCIATED(mat%matu)) DEALLOCATE(mat%matu)
     IF( ASSOCIATED(mat%matvt)) DEALLOCATE(mat%matvt)
     ALLOCATE(mat%matu(n, kl+ku))
     ALLOCATE(mat%matvt(kl+ku,n))
 !
     mat%matu = zero
     mat%matvt = zero                 !    kl=3,  ku=2
     DO j=1,kl                        !   [ 1 0 0 . . ]
        mat%matu(j,j) = one           !   [ 0 1 0 . . ]
     END DO                           !   [ 0 0 1 . . ]
 !                                    !   [ 0 . . . . ]
     DO j=1,ku                        !   [ . . . . . ]
        i=n-ku+j                      !   [ . . . 1 0 ]
        mat%matu(i,ku+j) = one        !   [ . . . 0 1 ] 
     END DO
   END SUBROUTINE init_periodic
 !===========================================================================
   SUBROUTINE init_zperiodic(kl, ku, n, nterms, mat, kmat)
 !
 !  Initialize Lapack Periodic General Banded matrice
 !
     INTEGER, INTENT(in) :: kl, ku, n, nterms
     INTEGER, OPTIONAL :: kmat
     TYPE(zperiodic_mat) :: mat
     INTEGER :: i,j
     DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0
 !
 ! In band matrix matp%mat is a GB matrix
     IF( PRESENT(kmat))  THEN
        CALL init(kl, ku, n, nterms, mat%mat, kmat)
     ELSE
        CALL init(kl, ku, n, nterms, mat%mat)
     END IF
     mat%nterms = nterms
 !
 !  Off band matrices
     IF( ASSOCIATED(mat%matu)) DEALLOCATE(mat%matu)
     IF( ASSOCIATED(mat%matvt)) DEALLOCATE(mat%matvt)
     ALLOCATE(mat%matu(n, kl+ku))
     ALLOCATE(mat%matvt(kl+ku,n))
 !
     mat%matu = zero
     mat%matvt = zero                 !    kl=3,  ku=2
     DO j=1,kl                        !   [ 1 0 0 . . ]
        mat%matu(j,j) = one           !   [ 0 1 0 . . ]
     END DO                           !   [ 0 0 1 . . ]
 !                                    !   [ 0 . . . . ]
     DO j=1,ku                        !   [ . . . . . ]
        i=n-ku+j                      !   [ . . . 1 0 ]
        mat%matu(i,ku+j) = one        !   [ . . . 0 1 ] 
     END DO
   END SUBROUTINE init_zperiodic
 !===========================================================================
   SUBROUTINE init_pb(ku, n, nterms, mat, kmat)
 !
 !  Initialize Lapack Packed Banded matrice
 !
     INTEGER, INTENT(in) :: ku, n, nterms
     INTEGER, OPTIONAL :: kmat
     TYPE(pbmat) :: mat
     INTEGER :: lda
 !
     mat%ku = ku
     mat%rank = n
     mat%nterms = nterms
     IF(PRESENT(kmat)) mat%kmat = kmat
     lda = ku + 1
     IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     ALLOCATE(mat%val(lda,n))
     mat%val = 0.0d0
   END SUBROUTINE init_pb
 !===========================================================================
   SUBROUTINE init_zge(n, nterms, mat, kmat, mrows)
 !
 !  Initialize Lapack General Dense matrice
 !
     INTEGER, INTENT(in) :: n, nterms
     INTEGER, OPTIONAL :: kmat
     INTEGER, OPTIONAL :: mrows
     TYPE(zgemat) :: mat
 !
     mat%ncols = n
     mat%mrows = n
     IF(PRESENT(mrows)) THEN
        mat%mrows = mrows
     END IF
     mat%rank = n
     mat%nterms = nterms
     IF(PRESENT(kmat)) mat%kmat = kmat
     IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv)
     ALLOCATE(mat%val(mat%mrows,mat%ncols))
     ALLOCATE(mat%piv(MIN(mat%mrows,mat%ncols)))
     mat%val = 0.0d0
     mat%piv = 0
   END SUBROUTINE init_zge
 !===========================================================================
   SUBROUTINE init_zgb(kl, ku, n, nterms, mat, kmat, mrows)
 !
 !  Initialize Lapack General Banded matrice
 !
     INTEGER, INTENT(in) :: kl, ku, n, nterms
     INTEGER, OPTIONAL :: kmat
     INTEGER, OPTIONAL :: mrows
     TYPE(zgbmat) :: mat
     INTEGER :: lda
 !
     mat%kl = kl
     mat%ku = ku
     mat%ncols = n
     mat%mrows = n
     IF(PRESENT(mrows)) THEN
        mat%mrows = mrows
     END IF
     mat%rank = n   ! Warning: ok if square matrix
     mat%nterms = nterms
     IF(PRESENT(kmat)) mat%kmat = kmat
     lda = 2*kl + ku + 1
     IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv)
     ALLOCATE(mat%val(lda,n))
     ALLOCATE(mat%piv(n))
     mat%val = 0.0d0
     mat%piv = 0
   END SUBROUTINE init_zgb
 !===========================================================================
   SUBROUTINE init_zpb(ku, n, nterms, mat, kmat)
 !
 !  Initialize Lapack Packed Banded matrice
 !
     INTEGER, INTENT(in) :: ku, n, nterms
      INTEGER, OPTIONAL :: kmat
    TYPE(zpbmat) :: mat
     INTEGER :: lda
 !
     mat%ku = ku
     mat%rank = n
     mat%nterms = nterms
     IF(PRESENT(kmat)) mat%kmat = kmat
     lda = ku + 1
     IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     ALLOCATE(mat%val(lda,n))
     mat%val = 0.0d0
   END SUBROUTINE init_zpb
 !===========================================================================
   SUBROUTINE mcopy_ge(mata, matb)
 !
 !  Matrix copy: B = A
 !
     TYPE(gemat) :: mata, matb
 !
     matb%rank   = mata%rank
     matb%mrows  = mata%mrows
     matb%ncols  = mata%ncols
     matb%nterms = mata%nterms
     matb%kmat   = mata%kmat
     IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
     IF( ASSOCIATED(matb%piv)) DEALLOCATE(matb%piv)
     ALLOCATE(matb%val(matb%mrows,matb%ncols))
     ALLOCATE(matb%piv(MIN(matb%mrows,matb%ncols)))
     matb%val = mata%val
     matb%piv = mata%piv
   END SUBROUTINE mcopy_ge
 !===========================================================================
   SUBROUTINE mcopy_gb(mata, matb)
 !
 !  Matrix copy: B = A
 !
     TYPE(gbmat) :: mata, matb
     INTEGER :: n, lda
 !
     n = mata%rank
     matb%kl = mata%kl
     matb%ku = mata%ku
     matb%rank   = mata%rank
     matb%mrows  = mata%mrows
     matb%ncols  = mata%ncols
     matb%nterms = mata%nterms
     matb%kmat   = mata%kmat
     lda = 2*mata%kl + mata%ku + 1
     IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
     IF( ASSOCIATED(matb%piv)) DEALLOCATE(matb%piv)
     ALLOCATE(matb%val(lda,n))
     ALLOCATE(matb%piv(n))
     matb%val = mata%val
     matb%piv = mata%piv
   END SUBROUTINE mcopy_gb
 !===========================================================================
   SUBROUTINE mcopy_periodic(mata, matb)
 !
 !  Matrix copy: B = A
 !
     TYPE(periodic_mat) :: mata, matb
     INTEGER :: n, kl, ku
 !
     kl = mata%mat%kl
     ku = mata%mat%ku
     n  = mata%mat%rank
 !
     CALL mcopy(mata%mat, matb%mat)
     IF( ASSOCIATED(matb%matu)) DEALLOCATE(matb%matu)
     IF( ASSOCIATED(matb%matvt)) DEALLOCATE(matb%matvt)
     ALLOCATE(matb%matu(n,kl+ku))
     ALLOCATE(matb%matvt(kl+ku,n))
     matb%matu = mata%matu
     matb%matvt = mata%matvt
   END SUBROUTINE mcopy_periodic
 !===========================================================================
   SUBROUTINE mcopy_zperiodic(mata, matb)
 !
 !  Matrix copy: B = A
 !
     TYPE(zperiodic_mat) :: mata, matb
     INTEGER :: n, kl, ku
 !
     kl = mata%mat%kl
     ku = mata%mat%ku
     n  = mata%mat%rank
 !
     CALL mcopy(mata%mat, matb%mat)
     IF( ASSOCIATED(matb%matu)) DEALLOCATE(matb%matu)
     IF( ASSOCIATED(matb%matvt)) DEALLOCATE(matb%matvt)
     ALLOCATE(matb%matu(n,kl+ku))
     ALLOCATE(matb%matvt(kl+ku,n))
     matb%matu = mata%matu
     matb%matvt = mata%matvt
   END SUBROUTINE mcopy_zperiodic
 !===========================================================================
   SUBROUTINE mcopy_pb(mata, matb)
 !
 !  Matrix copy: B = A
 !
     TYPE(pbmat) :: mata, matb
     INTEGER :: n, lda
 !
     n = mata%rank
     matb%ku = mata%ku
     matb%rank = mata%rank
     matb%nterms = mata%nterms
     lda = mata%ku + 1
     IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
     ALLOCATE(matb%val(lda,n))
     matb%val = mata%val
   END SUBROUTINE mcopy_pb
 !===========================================================================
   SUBROUTINE mcopy_zge(mata, matb)
 !
 !  Matrix copy: B = A
 !
     TYPE(zgemat) :: mata, matb
     INTEGER :: n
 !
     n = mata%rank
     matb%rank = n
     IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
     IF( ASSOCIATED(matb%piv)) DEALLOCATE(matb%piv)
     ALLOCATE(matb%val(n,n))
     ALLOCATE(matb%piv(n))
     matb%val = mata%val
     matb%piv = mata%piv
   END SUBROUTINE mcopy_zge
 !===========================================================================
   SUBROUTINE mcopy_zgb(mata, matb)
 !
 !  Matrix copy: B = A
 !
     TYPE(zgbmat) :: mata, matb
     INTEGER :: n, lda
 !
     n = mata%rank
     matb%kl = mata%kl
     matb%ku = mata%ku
     matb%rank = mata%rank
     matb%nterms = mata%nterms
     lda = 2*mata%kl + mata%ku + 1
     IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
     IF( ASSOCIATED(matb%piv)) DEALLOCATE(matb%piv)
     ALLOCATE(matb%val(lda,n))
     ALLOCATE(matb%piv(n))
     matb%val = mata%val
     matb%piv = mata%piv
   END SUBROUTINE mcopy_zgb
 !===========================================================================
   SUBROUTINE mcopy_zpb(mata, matb)
 !
 !  Matrix copy: B = A
 !
     TYPE(zpbmat) :: mata, matb
     INTEGER :: n, lda
 !
     n = mata%rank
     matb%ku = mata%ku
     matb%rank = mata%rank
     matb%nterms = mata%nterms
     lda = mata%ku + 1
     IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
     ALLOCATE(matb%val(lda,n))
     matb%val = mata%val
   END SUBROUTINE mcopy_zpb
 !===========================================================================
   SUBROUTINE maddto_ge(mata, alpha, matb)
 !
 !   A <- A + alpha*B
 !
     TYPE(gemat) :: mata, matb
     DOUBLE PRECISION :: alpha
 !
     mata%val = mata%val + alpha*matb%val
   END SUBROUTINE maddto_ge
 !===========================================================================
   SUBROUTINE maddto_gb(mata, alpha, matb)
 !
 !   A <- A + alpha*B
 !
     TYPE(gbmat) :: mata, matb
     DOUBLE PRECISION :: alpha
 !
     mata%val = mata%val + alpha*matb%val
   END SUBROUTINE maddto_gb
 !===========================================================================
   SUBROUTINE maddto_periodic(mata, alpha, matb)
 !
 !   A <- A + alpha*B
 !
     TYPE(periodic_mat) :: mata, matb
     DOUBLE PRECISION :: alpha
 !
     mata%mat%val = mata%mat%val + alpha*matb%mat%val
     mata%matvt   = mata%matvt + alpha*matb%matvt
   END SUBROUTINE maddto_periodic
 !===========================================================================
   SUBROUTINE maddto_zperiodic(mata, alpha, matb)
 !
 !   A <- A + alpha*B
 !
     TYPE(zperiodic_mat) :: mata, matb
     DOUBLE COMPLEX :: alpha
 !
     mata%mat%val = mata%mat%val + alpha*matb%mat%val
     mata%matvt   = mata%matvt + alpha*matb%matvt
   END SUBROUTINE maddto_zperiodic
 !===========================================================================
   SUBROUTINE maddto_pb(mata, alpha, matb)
 !
 !   A <- A + alpha*B
 !
     TYPE(pbmat) :: mata, matb
     DOUBLE PRECISION :: alpha
 !
     mata%val = mata%val + alpha*matb%val
   END SUBROUTINE maddto_pb
 !===========================================================================
   SUBROUTINE maddto_zge(mata, alpha, matb)
 !
 !   A <- A + alpha*B
 !
     TYPE(zgemat) :: mata, matb
     DOUBLE COMPLEX :: alpha
 !
     mata%val = mata%val + alpha*matb%val
   END SUBROUTINE maddto_zge
 !===========================================================================
   SUBROUTINE maddto_zgb(mata, alpha, matb)
 !
 !   A <- A + alpha*B
 !
     TYPE(zgbmat) :: mata, matb
     DOUBLE COMPLEX :: alpha
 !
     mata%val = mata%val + alpha*matb%val
   END SUBROUTINE maddto_zgb
 !===========================================================================
   SUBROUTINE maddto_zpb(mata, alpha, matb)
 !
 !   A <- A + alpha*B
 !
     TYPE(zpbmat) :: mata, matb
     DOUBLE COMPLEX :: alpha
 !
     mata%val = mata%val + alpha*matb%val
   END SUBROUTINE maddto_zpb
 !===========================================================================
   SUBROUTINE getvalp_ge(mat, p)
 !
 !  Get pointer to matrix coefficients
 !
     TYPE(gemat) :: mat
     DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p
 !
     p => mat%val
   END SUBROUTINE getvalp_ge
 !===========================================================================
   SUBROUTINE getvalp_gb(mat, p)
 !
 !  Get pointer to matrix coefficients
 !
     TYPE(gbmat) :: mat
     DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p
 !
     p => mat%val
   END SUBROUTINE getvalp_gb
 !===========================================================================
   SUBROUTINE getvalp_pb(mat, p)
 !
 !  Get pointer to matrix coefficients
 !
     TYPE(pbmat) :: mat
     DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p
 !
     p => mat%val
   END SUBROUTINE getvalp_pb
 !===========================================================================
   SUBROUTINE getvalp_zge(mat, p)
 !
 !  Get pointer to matrix coefficients
 !
     TYPE(zgemat) :: mat
     DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: p
 !
     p => mat%val
   END SUBROUTINE getvalp_zge
 !===========================================================================
   SUBROUTINE getvalp_zgb(mat, p)
 !
 !  Get pointer to matrix coefficients
 !
     TYPE(zgbmat) :: mat
     DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: p
 !
     p => mat%val
   END SUBROUTINE getvalp_zgb
 !===========================================================================
   SUBROUTINE getvalp_zpb(mat, p)
 !
 !  Get pointer to matrix coefficients
 !
     TYPE(zpbmat) :: mat
     DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: p
 !
     p => mat%val
   END SUBROUTINE getvalp_zpb
 !===========================================================================
   SUBROUTINE destroy_gb(mat)
 !
 !   Deallocate pointers in mat
 !
     TYPE(gbmat) :: mat
     IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv)    
   END SUBROUTINE destroy_gb
 !===========================================================================
   SUBROUTINE destroy_ge(mat)
 !
 !   Deallocate pointers in mat
 !
     TYPE(gemat) :: mat
     IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv)    
   END SUBROUTINE destroy_ge
 !===========================================================================
   SUBROUTINE destroy_pb(mat)
 !
 !   Deallocate pointers in mat
 !
     TYPE(pbmat) :: mat
     IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
   END SUBROUTINE destroy_pb
 !===========================================================================
   SUBROUTINE destroy_zgb(mat)
 !
 !   Deallocate pointers in mat
 !
     TYPE(zgbmat) :: mat
     IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv)    
   END SUBROUTINE destroy_zgb
 !===========================================================================
   SUBROUTINE destroy_zge(mat)
 !
 !   Deallocate pointers in mat
 !
     TYPE(zgemat) :: mat
     IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv)    
   END SUBROUTINE destroy_zge
 !===========================================================================
   SUBROUTINE destroy_zpb(mat)
 !
 !   Deallocate pointers in mat
 !
     TYPE(zpbmat) :: mat
     IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
   END SUBROUTINE destroy_zpb
 !===========================================================================
   SUBROUTINE destroy_periodic(mat)
 !
 !   Deallocate pointers in mat
 !
     TYPE(periodic_mat) :: mat
     IF( ASSOCIATED(mat%matu)) DEALLOCATE(mat%matu)
     IF( ASSOCIATED(mat%matvt)) DEALLOCATE(mat%matvt)
     CALL destroy(mat%mat)
   END SUBROUTINE destroy_periodic
 !===========================================================================
   SUBROUTINE destroy_zperiodic(mat)
 !
 !   Deallocate pointers in mat
 !
     TYPE(zperiodic_mat) :: mat
     IF( ASSOCIATED(mat%matu)) DEALLOCATE(mat%matu)
     IF( ASSOCIATED(mat%matvt)) DEALLOCATE(mat%matvt)
     CALL destroy(mat%mat)
   END SUBROUTINE destroy_zperiodic
 !===========================================================================
   SUBROUTINE updt_gb(mat, i, j, val)
 !
 !  Update element Aij into banded matrix
 !
     TYPE(gbmat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
     INTEGER :: lda, n, ib
 !
     lda = SIZE(mat%val, 1)
     n = mat%rank
     ib = mat%kl + mat%ku + i - j + 1
     IF( (ib .GT. lda) .OR. (j .GT. n) .OR. (j .LT. 1)) THEN
        WRITE(*,*) 'UPDT: i, j out of range ', i, j, lda, n
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     mat%val(ib,j) = mat%val(ib,j) + val    
   END SUBROUTINE updt_gb
 !===========================================================================
   SUBROUTINE updt_ge(mat, i, j, val)
 !
 !  Update element Aij into banded matrix
 !
     TYPE(gemat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
 !
     IF( (i .GT. mat%mrows) .OR. (j .GT. mat%ncols) .OR. (j .LT. 1) .OR. (i.LT.1)) THEN
        WRITE(*,*) 'UPDT: i, j out of range ', i, j, mat%mrows, mat%ncols
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     mat%val(i,j) = mat%val(i,j) + val    
   END SUBROUTINE updt_ge
 !===========================================================================
   SUBROUTINE updt_periodic(mat, i, j, val)
 !
 !  Update element Aij into periodic banded matrix
 !
     TYPE(periodic_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
     INTEGER :: n, kl, ku
 !
     n = mat%mat%rank
     kl = mat%mat%kl
     ku = mat%mat%ku
 !
     IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN
 ! 
 ! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n)
        mat%matvt(i,j) = mat%matvt(i,j) + val
 !
     ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN
 !
 ! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku)
        mat%matvt(i-n+kl+ku,j) = mat%matvt(i-n+kl+ku,j) + val
 !
     ELSE
 !
 ! Put into the banded matrix
        CALL updtmat(mat%mat, i, j, val)
 !
     END IF
   END SUBROUTINE updt_periodic
 !===========================================================================
   SUBROUTINE updt_zperiodic(mat, i, j, val)
 !
 !  Update element Aij into periodic banded matrix
 !
     TYPE(zperiodic_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: i, j
     DOUBLE COMPLEX, INTENT(in) :: val
     INTEGER :: n, kl, ku
 !
     n = mat%mat%rank
     kl = mat%mat%kl
     ku = mat%mat%ku
 !
     IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN
 ! 
 ! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n)
        mat%matvt(i,j) = mat%matvt(i,j) + val
 !
     ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN
 !
 ! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku)
        mat%matvt(i-n+kl+ku,j) = mat%matvt(i-n+kl+ku,j) + val
 !
     ELSE
 !
 ! Put into the banded matrix
        CALL updtmat(mat%mat, i, j, val)
 !
     END IF
   END SUBROUTINE updt_zperiodic
 !===========================================================================
   SUBROUTINE updt_pb(mat, i, j, val)
 !
 !  Update element Aij into banded matrix
 !
     TYPE(pbmat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
     INTEGER :: lda, n, ib
 !
     lda = SIZE(mat%val, 1)
     n = mat%rank
     IF( i .LE. j ) THEN
        ib = mat%ku + i - j + 1
        IF( (ib .GT. lda) .OR. (j .GT. n) .OR. (j .LT. 1)) THEN
           WRITE(*,*) 'UPDT: i, j out of range ', i, j, lda, n
           STOP '*** Abnormal EXIT in MODULE matrix ***'
        END IF
        mat%val(ib,j) = mat%val(ib,j) + val
     END IF
   END SUBROUTINE updt_pb
 !===========================================================================
   SUBROUTINE updt_zgb(mat, i, j, val)
 !
 !  Update element Aij into banded matrix
 !
     TYPE(zgbmat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: i, j
     DOUBLE COMPLEX, INTENT(in) :: val
     INTEGER :: lda, n, ib
 !
     lda = SIZE(mat%val, 1)
     n = mat%rank
     ib = mat%kl + mat%ku + i - j + 1
     IF( (ib .GT. lda) .OR. (j .GT. n) .OR. (j .LT. 1)) THEN
        WRITE(*,*) 'UPDT: i, j out of range ', i, j, lda, n
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     mat%val(ib,j) = mat%val(ib,j) + val    
   END SUBROUTINE updt_zgb
 !===========================================================================
   SUBROUTINE updt_zpb(mat, i, j, val)
 !
 !  Update element Aij into banded matrix
 !
     TYPE(zpbmat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: i, j
     DOUBLE COMPLEX, INTENT(in) :: val
     INTEGER :: lda, n, ib
 !
     lda = SIZE(mat%val, 1)
     n = mat%rank
     IF( i .LE. j ) THEN
        ib = mat%ku + i - j + 1
        IF( (ib .GT. lda) .OR. (j .GT. n) .OR. (j .LT. 1)) THEN
           WRITE(*,*) 'UPDT: i, j out of range ', i, j, lda, n
           STOP '*** Abnormal EXIT in MODULE matrix ***'
        END IF
        mat%val(ib,j) = mat%val(ib,j) + val
     END IF
   END SUBROUTINE updt_zpb
 !===========================================================================
   SUBROUTINE getele_gb(mat, i, j, val)
 !
 !  Get element (i,j) of matrix
 !
     TYPE(gbmat), INTENT(in) :: mat
     DOUBLE PRECISION, INTENT (OUT) :: val
     INTEGER, INTENT (IN) :: i, j
     INTEGER :: lda, n, ib
 !
     lda = SIZE(mat%val, 1)
     n = mat%rank
     ib = mat%kl + mat%ku + i - j + 1
     IF( (ib .GT. lda) .OR. (j .GT. n)) THEN
        WRITE(*,*) 'GETELE: i, j out of range ', i, j
        STOP '*** Abnormal EXIT in MODULE matrix***'
     END IF
     val = mat%val(ib,j)
   END SUBROUTINE getele_gb
 !===========================================================================
   SUBROUTINE getele_periodic(mat, i, j, val)
 !
 !  Get element Aij of periodic banded matrix
 !
     TYPE(periodic_mat), INTENT(in) :: mat
     INTEGER, INTENT(in) :: i, j
     DOUBLE PRECISION, INTENT(out) :: val
     INTEGER :: n, kl, ku
 !
     n = mat%mat%rank
     kl = mat%mat%kl
     ku = mat%mat%ku
 !
     IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN
 ! 
 ! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n)
        val =mat%matvt(i,j)
 !
     ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN
 !
 ! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku)
        val = mat%matvt(i-n+kl+ku,j)
 !
     ELSE
 !
 ! Put into the banded matrix
        CALL getele(mat%mat, i, j, val)
 !
     END IF
   END SUBROUTINE getele_periodic
 !===========================================================================
   SUBROUTINE getele_zperiodic(mat, i, j, val)
 !
 !  Get element Aij of periodic banded matrix
 !
     TYPE(zperiodic_mat), INTENT(in) :: mat
     INTEGER, INTENT(in) :: i, j
     DOUBLE COMPLEX, INTENT(out) :: val
     INTEGER :: n, kl, ku
 !
     n = mat%mat%rank
     kl = mat%mat%kl
     ku = mat%mat%ku
 !
     IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN
 ! 
 ! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n)
        val =mat%matvt(i,j)
 !
     ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN
 !
 ! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku)
        val = mat%matvt(i-n+kl+ku,j)
 !
     ELSE
 !
 ! Put into the banded matrix
        CALL getele(mat%mat, i, j, val)
 !
     END IF
   END SUBROUTINE getele_zperiodic
 !===========================================================================
   SUBROUTINE getele_pb(mat, i, j, val)
 !
 !  Get element (i,j) of matrix
 !
     TYPE(pbmat), INTENT(in) :: mat
     DOUBLE PRECISION, INTENT (OUT) :: val
     INTEGER, INTENT (IN) :: i, j
     INTEGER :: lda, n, ib, irow, jcol
 !
     lda = SIZE(mat%val, 1)
     n = mat%rank
     IF( i .LE. j ) THEN   ! Upper triangular matrix
        irow = i; jcol = j
     ELSE                  ! Lower triangular matrix
        irow = j; jcol = i
     END IF 
     ib = mat%ku + irow - jcol + 1
     IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN
        WRITE(*,*) 'GETELE: i, j out of range ', i, j
        STOP '*** Abnormal EXIT in MODULE matrix***'
     END IF
     val = mat%val(ib,jcol)
   END SUBROUTINE getele_pb
 !===========================================================================
   SUBROUTINE getele_zgb(mat, i, j, val)
 !
 !  Get element (i,j) of matrix
 !
     TYPE(zgbmat), INTENT(in) :: mat
     DOUBLE COMPLEX, INTENT (OUT) :: val
     INTEGER, INTENT (IN) :: i, j
     INTEGER :: lda, n, ib
 !
     lda = SIZE(mat%val, 1)
     n = mat%rank
     ib = mat%kl + mat%ku + i - j + 1
     IF( (ib .GT. lda) .OR. (j .GT. n)) THEN
        WRITE(*,*) 'GETELE: i, j out of range ', i, j
        STOP '*** Abnormal EXIT in MODULE matrix***'
     END IF
     val = mat%val(ib,j)
   END SUBROUTINE getele_zgb
 !===========================================================================
   SUBROUTINE getele_zpb(mat, i, j, val)
 !
 !  Get element (i,j) of matrix
 !
     TYPE(zpbmat), INTENT(in) :: mat
     DOUBLE COMPLEX, INTENT (OUT) :: val
     INTEGER, INTENT (IN) :: i, j
     INTEGER :: lda, n, ib, irow, jcol
 !
     lda = SIZE(mat%val, 1)
     n = mat%rank
 !
     IF(  i .LE. j ) THEN   ! Upper triangular matrix
        irow = i; jcol = j
        ib = mat%ku + irow - jcol + 1
        IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN
           WRITE(*,*) 'GETELE: i, j out of range ', i, j
           STOP '*** Abnormal EXIT in MODULE matrix***'
        END IF
        val = mat%val(ib,jcol)
        RETURN
     ELSE                  ! Lower triangular matrix
        irow = j; jcol = i
        ib = mat%ku + irow - jcol + 1
        IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN
           WRITE(*,*) 'GETELE: i, j out of range ', i, j
           STOP '*** Abnormal EXIT in MODULE matrix***'
        END IF
        val = CONJG(mat%val(ib,jcol))
     END IF 
   END SUBROUTINE getele_zpb
 !===========================================================================
   SUBROUTINE putele_gb(mat, i, j, val)
 !
 !  Put element (i,j) of matrix
 !
     TYPE(gbmat), INTENT(inout) :: mat
     DOUBLE PRECISION, INTENT (in) :: val
     INTEGER, INTENT (in) :: i, j
     INTEGER :: lda, n, ib
 !
     lda = SIZE(mat%val, 1)
     n = mat%rank
     ib = mat%kl + mat%ku + i - j + 1
     IF( (ib .GT. lda) .OR. (j .GT. n)) THEN
        WRITE(*,*) 'GETELE: i, j out of range ', i, j
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     mat%val(ib,j) = val
   END SUBROUTINE putele_gb
 !===========================================================================
   SUBROUTINE putele_periodic(mat, i, j, val)
 !
 !  Put element Aij into periodic banded matrix
 !
     TYPE(periodic_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
     INTEGER :: n, kl, ku
 !
     n = mat%mat%rank
     kl = mat%mat%kl
     ku = mat%mat%ku
 !
     IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN
 ! 
 ! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n)
        mat%matvt(i,j) = val
 !
     ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN
 !
 ! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku)
        mat%matvt(i-n+kl+ku,j) = val
 !
     ELSE
 !
 ! Put into the banded matrix
        CALL putele(mat%mat, i, j, val)
 !
     END IF
   END SUBROUTINE putele_periodic
 !===========================================================================
   SUBROUTINE putele_zperiodic(mat, i, j, val)
 !
 !  Put element Aij into periodic banded matrix
 !
     TYPE(zperiodic_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: i, j
     DOUBLE COMPLEX, INTENT(in) :: val
     INTEGER :: n, kl, ku
 !
     n = mat%mat%rank
     kl = mat%mat%kl
     ku = mat%mat%ku
 !
     IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN
 ! 
 ! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n)
        mat%matvt(i,j) = val
 !
     ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN
 !
 ! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku)
        mat%matvt(i-n+kl+ku,j) = val
 !
     ELSE
 !
 ! Put into the banded matrix
        CALL putele(mat%mat, i, j, val)
 !
     END IF
   END SUBROUTINE putele_zperiodic
 !===========================================================================
   SUBROUTINE putele_pb(mat, i, j, val)
 !
 !  Put element (i,j) of matrix
 !
     TYPE(pbmat), INTENT(inout) :: mat
     DOUBLE PRECISION, INTENT (in) :: val
     INTEGER, INTENT (IN) :: i, j
     INTEGER :: lda, n, ib, irow, jcol
 !
     lda = SIZE(mat%val, 1)
     n = mat%rank
     IF( i .LE. j ) THEN   ! Upper triangular matrix
        irow = i; jcol = j
     ELSE                  ! Lower triangular matrix
        irow = j; jcol = i
     END IF 
     ib = mat%ku + irow - jcol + 1
     IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN
        WRITE(*,*) 'PUTELE: i, j out of range ', i, j
        STOP '*** Abnormal EXIT in MODULE matrix***'
     END IF
     mat%val(ib,jcol) = val
   END SUBROUTINE putele_pb
 !===========================================================================
   SUBROUTINE putele_zgb(mat, i, j, val)
 !
 !  Put element (i,j) of matrix
 !
     TYPE(zgbmat), INTENT(inout) :: mat
     DOUBLE COMPLEX, INTENT (in) :: val
     INTEGER, INTENT (in) :: i, j
     INTEGER :: lda, n, ib
 !
     lda = SIZE(mat%val, 1)
     n = mat%rank
     ib = mat%kl + mat%ku + i - j + 1
     IF( (ib .GT. lda) .OR. (j .GT. n)) THEN
        WRITE(*,*) 'GETELE: i, j out of range ', i, j
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     mat%val(ib,j) = val
   END SUBROUTINE putele_zgb
 !===========================================================================
   SUBROUTINE putele_zpb(mat, i, j, val)
 !
 !  Put element (i,j) of matrix
 !
     TYPE(zpbmat), INTENT(inout) :: mat
     DOUBLE COMPLEX, INTENT (in) :: val
     INTEGER, INTENT (IN) :: i, j
     INTEGER :: lda, n, ib, irow, jcol
 !
     lda = SIZE(mat%val, 1)
     n = mat%rank
     IF( i .LE. j ) THEN   ! Upper triangular matrix
        irow = i; jcol = j
        ib = mat%ku + irow - jcol + 1
        IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN
           WRITE(*,*) 'GETELE: i, j out of range ', i, j
           STOP '*** Abnormal EXIT in MODULE matrix***'
        END IF
        mat%val(ib,jcol) = val
     ELSE                  ! Lower triangular matrix
        irow = j; jcol = i
        ib = mat%ku + irow - jcol + 1
        IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN
           WRITE(*,*) 'PUTELE: i, j out of range ', i, j
           STOP '*** Abnormal EXIT in MODULE matrix***'
        END IF
        mat%val(ib,jcol) = CONJG(val)
     END IF 
   END SUBROUTINE putele_zpb
 !===========================================================================
   SUBROUTINE getcol_gb(mat, j, arr)
 !
 !   Get a column from matrix
 !
     TYPE(gbmat), INTENT(in) :: mat
     INTEGER, INTENT(in) :: j
     DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr
     INTEGER :: m, kl, ku
     INTEGER :: ibmin, ibmax, imin, imax
 !
     kl = mat%kl
     ku = mat%ku
     m = mat%mrows
     IF( SIZE(arr) .LT. m ) THEN
        WRITE(*,*) 'GETCOL: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     arr(1:m) = 0.0d0
     imin = MAX(1,j-ku)
     imax = MIN(m, j+kl)
     ibmin =  kl+ku+imin-j+1
     ibmax =  kl+ku+imax-j+1
     arr(imin:imax) = mat%val(ibmin:ibmax,j)
   END SUBROUTINE getcol_gb
 !===========================================================================
   SUBROUTINE getcol_periodic(mat, j, arr)
 !
 !   Get a column from matrix
 !
     TYPE(periodic_mat), INTENT(in) :: mat
     INTEGER, INTENT(in) :: j
     DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr
     INTEGER :: n, kl, ku
 !
     kl = mat%mat%kl
     ku = mat%mat%ku
     n  = mat%mat%rank
 !
     CALL getcol(mat%mat, j, arr)
 !
     IF( j.GE.n-kl+1 ) THEN
        arr(1:kl) = mat%matvt(1:kl,j)
     ELSE IF( j.LE.ku ) THEN
        arr(n-ku+1:n) = mat%matvt(kl+1:kl+ku,j)
     END IF
   END SUBROUTINE getcol_periodic
 !===========================================================================
   SUBROUTINE getcol_zperiodic(mat, j, arr)
 !
 !   Get a column from matrix
 !
     TYPE(zperiodic_mat), INTENT(in) :: mat
     INTEGER, INTENT(in) :: j
     DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr
     INTEGER :: n, kl, ku
 !
     kl = mat%mat%kl
     ku = mat%mat%ku
     n  = mat%mat%rank
 !
     CALL getcol(mat%mat, j, arr)
 !
     IF( j.GE.n-kl+1 ) THEN
        arr(1:kl) = mat%matvt(1:kl,j)
     ELSE IF( j.LE.ku ) THEN
        arr(n-ku+1:n) = mat%matvt(kl+1:kl+ku,j)
     END IF
   END SUBROUTINE getcol_zperiodic
 !===========================================================================
   SUBROUTINE getcol_pb(mat, j, arr)
 !
 !   Get a column from matrix
 !
     TYPE(pbmat), INTENT(in) :: mat
     INTEGER, INTENT(in) :: j
     DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr
     INTEGER :: n, ku
     INTEGER :: i, ib, ibmin, ibmax, imin, imax
 !
     ku = mat%ku
     n = mat%rank
     IF( SIZE(arr) .LT. n ) THEN
        WRITE(*,*) 'GETCOL: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     arr(1:n) = 0.0d0
 !
     imin=MAX(1,j-ku); imax=j  ! The column in the upper diagonal part
     ibmin=ku+1+imin-j  ; ibmax=ku+1+imax-j
     arr(imin:imax) = mat%val(ibmin:ibmax,j)
 !
     imin=j+1; imax = MIN(n,j+ku)  ! The column in the lower diagonal part
     DO i=imin,imax
        ib = ku+1+j-i
        arr(i) = mat%val(ib,i)
     END DO
   END SUBROUTINE getcol_pb
 !===========================================================================
   SUBROUTINE getcol_zgb(mat, j, arr)
 !
 !   Get a column from matrix
 !
     TYPE(zgbmat), INTENT(in) :: mat
     INTEGER, INTENT(in) :: j
     DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr
     INTEGER :: m, kl, ku
     INTEGER :: ibmin, ibmax, imin, imax
 !
     kl = mat%kl
     ku = mat%ku
     m = mat%mrows
     IF( SIZE(arr) .LT. m ) THEN
        WRITE(*,*) 'GETCOL: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     arr(1:m) = 0.0d0
     imin = MAX(1,j-ku)
     imax = MIN(m, j+kl)
     ibmin =  kl+ku+imin-j+1
     ibmax =  kl+ku+imax-j+1
     arr(imin:imax) = mat%val(ibmin:ibmax,j)
   END SUBROUTINE getcol_zgb
 !===========================================================================
   SUBROUTINE getcol_zpb(mat, j, arr)
 !
 !   Get a column from matrix
 !
     TYPE(zpbmat), INTENT(in) :: mat
     INTEGER, INTENT(in) :: j
     DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr
     INTEGER :: n, ku
     INTEGER :: i, ib, ibmin, ibmax, imin, imax
 !
     ku = mat%ku
     n = mat%rank
     IF( SIZE(arr) .LT. n ) THEN
        WRITE(*,*) 'GETCOL: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     arr(1:n) = 0.0d0
 !
     imin=MAX(1,j-ku); imax=j  ! The column in the upper diagonal part
     ibmin=ku+1+imin-j  ; ibmax=ku+1+imax-j
     arr(imin:imax) = mat%val(ibmin:ibmax,j)
 !
     imin=j+1; imax = MIN(n,j+ku)  ! The column in the lower diagonal part
     DO i=imin,imax
        ib = ku+1+j-i
        arr(i) = CONJG(mat%val(ib,i))
     END DO
   END SUBROUTINE getcol_zpb
 !===========================================================================
   SUBROUTINE getrow_gb(mat, i, arr)
 !
 !   Get a row from matrix
 !
     TYPE(gbmat), INTENT(in) :: mat
     INTEGER, INTENT(in) :: i
     DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr
     INTEGER :: n, kl, ku
     INTEGER :: j, ib, jmin, jmax
 !
     kl = mat%kl
     ku = mat%ku
     n = mat%rank
     IF( SIZE(arr) .LT. n ) THEN
        WRITE(*,*) 'GETROW: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     arr(1:n) = 0.0d0
     jmin = MAX(1,i-kl)
     jmax = MIN(n, i+ku)
     DO j=jmin,jmax
        ib =  kl+ku+i-j+1
        arr(j) = mat%val(ib,j)
     END DO
   END SUBROUTINE getrow_gb
 !===========================================================================
   SUBROUTINE getrow_ge(mat, i, arr)
 !
 !   Get a row from matrix
 !
     TYPE(gemat), INTENT(in) :: mat
     INTEGER, INTENT(in) :: i
     DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr
     INTEGER :: n
 !
     n = mat%ncols
     IF( SIZE(arr) .LT. n ) THEN
        WRITE(*,*) 'GETROW: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     arr(1:n) = mat%val(i,1:n)
   END SUBROUTINE getrow_ge
 !===========================================================================
   SUBROUTINE getrow_periodic(mat, i, arr)
 !
 !   Get a row from matrix
 !
     TYPE(periodic_mat), INTENT(in) :: mat
     INTEGER, INTENT(in) :: i
     DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr
     INTEGER :: n, kl, ku
 !
     kl = mat%mat%kl
     ku = mat%mat%ku
     n  = mat%mat%rank
 !
     CALL getrow(mat%mat, i, arr)
 !
     IF( i.LE.kl ) THEN
        arr(n-kl+1:n) = mat%matvt(i,n-kl+1:n)
     ELSE IF( i.GE.n-ku+1 ) THEN
        arr(1:ku) = mat%matvt(i-n+kl+ku,1:ku)
     END IF
   END SUBROUTINE getrow_periodic
 !===========================================================================
   SUBROUTINE getrow_zperiodic(mat, i, arr)
 !
 !   Get a row from matrix
 !
     TYPE(zperiodic_mat), INTENT(in) :: mat
     INTEGER, INTENT(in) :: i
     DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr
     INTEGER :: n, kl, ku
 !
     kl = mat%mat%kl
     ku = mat%mat%ku
     n  = mat%mat%rank
 !
     CALL getrow(mat%mat, i, arr)
 !
     IF( i.LE.kl ) THEN
        arr(n-kl+1:n) = mat%matvt(i,n-kl+1:n)
     ELSE IF( i.GE.n-ku+1 ) THEN
        arr(1:ku) = mat%matvt(i-n+kl+ku,1:ku)
     END IF
   END SUBROUTINE getrow_zperiodic
 !===========================================================================
   SUBROUTINE getrow_pb(mat, i, arr)
 !
 !   Get a row from matrix
 !
     TYPE(pbmat), INTENT(in) :: mat
     INTEGER, INTENT(in) :: i
     DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr
     INTEGER :: n, ku
     INTEGER :: j, ib, ibmin, ibmax, jmin, jmax
 !
     ku = mat%ku
     n = mat%rank
     IF( SIZE(arr) .LT. n ) THEN
        WRITE(*,*) 'GETROW: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     arr(1:n) = 0.0d0
 !
     jmin=i; jmax=MIN(n,i+ku)
     DO j=jmin,jmax
        ib=ku+1+i-j
        arr(j) = mat%val(ib,j)
     END DO
 !
     jmin=MAX(1,i-ku); jmax=i-1
     DO j=jmin,jmax
        ib=ku+1+j-i
        arr(j) = mat%val(ib,i)
     END DO
   END SUBROUTINE getrow_pb
 !===========================================================================
   SUBROUTINE getrow_zgb(mat, i, arr)
 !
 !   Get a row from matrix
 !
     TYPE(zgbmat), INTENT(in) :: mat
     INTEGER, INTENT(in) :: i
     DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr
     INTEGER :: n, kl, ku
     INTEGER :: j, ib, jmin, jmax
 !
     kl = mat%kl
     ku = mat%ku
     n = mat%rank
     IF( SIZE(arr) .LT. n ) THEN
        WRITE(*,*) 'GETROW: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     arr(1:n) = 0.0d0
     jmin = MAX(1,i-kl)
     jmax = MIN(n, i+ku)
     DO j=jmin,jmax
        ib =  kl+ku+i-j+1
        arr(j) = mat%val(ib,j)
     END DO
   END SUBROUTINE getrow_zgb
 !===========================================================================
   SUBROUTINE getrow_zpb(mat, i, arr)
 !
 !   Get a row from matrix
 !
     TYPE(zpbmat), INTENT(in) :: mat
     INTEGER, INTENT(in) :: i
     DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr
     INTEGER :: n, ku
     INTEGER :: j, ib, ibmin, ibmax, jmin, jmax
 !
     ku = mat%ku
     n = mat%rank
     IF( SIZE(arr) .LT. n ) THEN
        WRITE(*,*) 'GETROW: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     arr(1:n) = 0.0d0
 !
     jmin=i; jmax=MIN(n,i+ku)
     DO j=jmin,jmax
        ib=ku+1+i-j
        arr(j) = mat%val(ib,j)
     END DO
 !
     jmin=MAX(1,i-ku); jmax=i-1
     DO j=jmin,jmax
        ib=ku+1+j-i
        arr(j) = CONJG(mat%val(ib,i))
     END DO
   END SUBROUTINE getrow_zpb
 !===========================================================================
   SUBROUTINE putcol_gb(mat, j, arr)
 !
 !   Put a column from matrix
 !
     TYPE(gbmat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: j
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr
     INTEGER :: m, kl, ku
     INTEGER :: ibmin, ibmax, imin, imax
 !
     kl = mat%kl
     ku = mat%ku
     m = mat%mrows
     IF( SIZE(arr) .LT. m ) THEN
        WRITE(*,*) 'PUTCOL: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     imin = MAX(1,j-ku)
     imax = MIN(m, j+kl)
     ibmin =  kl+ku+imin-j+1
     ibmax =  kl+ku+imax-j+1
     mat%val(ibmin:ibmax,j) = arr(imin:imax)
   END SUBROUTINE putcol_gb
 !===========================================================================
   SUBROUTINE putcol_ge(mat, j, arr)
 !
 !   Put a column from matrix
 !
     TYPE(gemat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: j
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr
     INTEGER :: m
 !
     m = mat%mrows
     IF( SIZE(arr) .LT. m ) THEN
        WRITE(*,*) 'PUTCOL: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     mat%val(:,j) = arr(:)
   END SUBROUTINE putcol_ge
 !===========================================================================
   SUBROUTINE putrow_periodic(mat, i, arr)
 !
 !   Put a row to matrix
 !
     TYPE(periodic_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: i
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr
     INTEGER :: n, kl, ku
 !
     kl = mat%mat%kl
     ku = mat%mat%ku
     n  = mat%mat%rank
 !
     CALL putrow(mat%mat, i, arr)
 !
     IF( i.LE.kl ) THEN
        mat%matvt(i,n-kl+1:n) = arr(n-kl+1:n)
     ELSE IF( i.GE.n-ku+1 ) THEN
        mat%matvt(i-n+kl+ku,1:ku) = arr(1:ku)
     END IF
   END SUBROUTINE putrow_periodic
 !===========================================================================
   SUBROUTINE putrow_zperiodic(mat, i, arr)
 !
 !   Put a row to matrix
 !
     TYPE(zperiodic_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: i
     DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr
     INTEGER :: n, kl, ku
 !
     kl = mat%mat%kl
     ku = mat%mat%ku
     n  = mat%mat%rank
 !
     CALL putrow(mat%mat, i, arr)
 !
     IF( i.LE.kl ) THEN
        mat%matvt(i,n-kl+1:n) = arr(n-kl+1:n)
     ELSE IF( i.GE.n-ku+1 ) THEN
        mat%matvt(i-n+kl+ku,1:ku) = arr(1:ku)
     END IF
   END SUBROUTINE putrow_zperiodic
 !===========================================================================
   SUBROUTINE putcol_periodic(mat, j, arr)
 !
 !   Put a column into matrix
 !
     TYPE(periodic_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: j
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr
     INTEGER :: n, kl, ku
 !
     kl = mat%mat%kl
     ku = mat%mat%ku
     n  = mat%mat%rank
 !
     CALL putcol(mat%mat, j, arr)
 !
     IF( j.GE.n-kl+1 ) THEN
        mat%matvt(1:kl,j) = arr(1:kl)
     ELSE IF( j.LE.ku ) THEN
        mat%matvt(kl+1:kl+ku,j) = arr(n-ku+1:n)
     END IF
   END SUBROUTINE putcol_periodic
 !===========================================================================
   SUBROUTINE putcol_zperiodic(mat, j, arr)
 !
 !   Put a column into matrix
 !
     TYPE(zperiodic_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: j
     DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr
     INTEGER :: n, kl, ku
 !
     kl = mat%mat%kl
     ku = mat%mat%ku
     n  = mat%mat%rank
 !
     CALL putcol(mat%mat, j, arr)
 !
     IF( j.GE.n-kl+1 ) THEN
        mat%matvt(1:kl,j) = arr(1:kl)
     ELSE IF( j.LE.ku ) THEN
        mat%matvt(kl+1:kl+ku,j) = arr(n-ku+1:n)
     END IF
   END SUBROUTINE putcol_zperiodic
 !===========================================================================
   SUBROUTINE putcol_pb(mat, j, arr)
 !
 !   Put a column from matrix
 !
     TYPE(pbmat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: j
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr
     INTEGER :: n, ku
     INTEGER :: i, ib, ibmin, ibmax, imin, imax
 !
     ku = mat%ku
     n = mat%rank
     IF( SIZE(arr) .LT. n ) THEN
        WRITE(*,*) 'PUTCOL: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     imin = MAX(1,j-ku); imax = j  ! The column in the upper diagonal part
     ibmin =  ku+imin-j+1; ibmax =  ku+imax-j+1
     mat%val(ibmin:ibmax,j) = arr(imin:imax)
  !
     imin=j+1; imax = MIN(n,j+ku)  ! The column in the lower diagonal part
     DO i=imin,imax
        ib = ku+1+j-i
        mat%val(ib,i) = arr(i)
     END DO
  END SUBROUTINE putcol_pb
 !===========================================================================
   SUBROUTINE putcol_zgb(mat, j, arr)
 !
 !   Put a column from matrix
 !
     TYPE(zgbmat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: j
     DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr
     INTEGER :: m, kl, ku
     INTEGER :: ibmin, ibmax, imin, imax
 !
     kl = mat%kl
     ku = mat%ku
     m = mat%mrows
     IF( SIZE(arr) .LT. m ) THEN
        WRITE(*,*) 'PUTCOL: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     imin = MAX(1,j-ku)
     imax = MIN(m, j+kl)
     ibmin =  kl+ku+imin-j+1
     ibmax =  kl+ku+imax-j+1
     mat%val(ibmin:ibmax,j) = arr(imin:imax)
   END SUBROUTINE putcol_zgb
 !===========================================================================
   SUBROUTINE putcol_zpb(mat, j, arr)
 !
 !   Put a column from matrix
 !
     TYPE(zpbmat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: j
     DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr
     INTEGER :: n, ku, i, ib
     INTEGER :: ibmin, ibmax, imin, imax
 !
     ku = mat%ku
     n = mat%rank
     IF( SIZE(arr) .LT. n ) THEN
        WRITE(*,*) 'PUTCOL: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
 !
     imin=MAX(1,j-ku); imax=j  ! The column in the upper diagonal part
     ibmin=ku+1+imin-j  ; ibmax=ku+1+imax-j
     mat%val(ibmin:ibmax,j) = arr(imin:imax)
 !
     imin=j+1; imax = MIN(n,j+ku)  ! The column in the lower diagonal part
     DO i=imin,imax
        ib = ku+1+j-i
        mat%val(ib,i) = CONJG(arr(i))
     END DO
   END SUBROUTINE putcol_zpb
 !===========================================================================
   SUBROUTINE putrow_gb(mat, i, arr)
 !
 !   Put a row from matrix
 !
     TYPE(gbmat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: i
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr
     INTEGER :: n, kl, ku
     INTEGER :: j, ib, jmin, jmax
 !
     kl = mat%kl
     ku = mat%ku
     n = mat%rank
     IF( SIZE(arr) .LT. n ) THEN
        WRITE(*,*) 'GETCOL: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     jmin = MAX(1,i-kl)
     jmax = MIN(n, i+ku)
     DO j=jmin,jmax
        ib =  kl+ku+i-j+1
        mat%val(ib,j) = arr(j)
     END DO
   END SUBROUTINE putrow_gb
 !===========================================================================
   SUBROUTINE putrow_ge(mat, i, arr)
 !
 !   Put a row from matrix
 !
     TYPE(gemat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: i
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr
     INTEGER :: n, kl, ku
     INTEGER :: j, ib, jmin, jmax
 !
     n = mat%rank
     IF( SIZE(arr) .LT. n ) THEN
        WRITE(*,*) 'PUTROW: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     mat%val(i,:) = arr(:)
   END SUBROUTINE putrow_ge
 !===========================================================================
   SUBROUTINE putrow_pb(mat, i, arr)
 !
 !   Put a row from matrix
 !
     TYPE(pbmat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: i
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr
     INTEGER :: n, ku
     INTEGER :: j, ib, jmin, jmax
 !
     ku = mat%ku
     n = mat%rank
     IF( SIZE(arr) .LT. n ) THEN
        WRITE(*,*) 'PUTROW: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     jmin = i
     jmax = MIN(n, i+ku)
     DO j=jmin,jmax
        ib =  ku+i-j+1
        mat%val(ib,j) = arr(j)
     END DO
 !
     jmin=MAX(1,i-ku); jmax=i-1
     DO j=jmin,jmax
        ib=ku+1+j-i
        mat%val(ib,i) = arr(j)
     END DO
   END SUBROUTINE putrow_pb
 !===========================================================================
   SUBROUTINE putrow_zgb(mat, i, arr)
 !
 !   Put a row from matrix
 !
     TYPE(zgbmat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: i
     DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr
     INTEGER :: n, kl, ku
     INTEGER :: j, ib, jmin, jmax
 !
     kl = mat%kl
     ku = mat%ku
     n = mat%rank
     IF( SIZE(arr) .LT. n ) THEN
        WRITE(*,*) 'PUTROW: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     jmin = MAX(1,i-kl)
     jmax = MIN(n, i+ku)
     DO j=jmin,jmax
        ib =  kl+ku+i-j+1
        mat%val(ib,j) = arr(j)
     END DO
   END SUBROUTINE putrow_zgb
 !===========================================================================
   SUBROUTINE putrow_zpb(mat, i, arr)
 !
 !   Put a row from matrix
 !
     TYPE(zpbmat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: i
     DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr
     INTEGER :: n, ku
     INTEGER :: j, ib, jmin, jmax
 !
     ku = mat%ku
     n = mat%rank
     IF( SIZE(arr) .LT. n ) THEN
        WRITE(*,*) 'PUTROW: size of arr too small'
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     jmin = i
     jmax = MIN(n, i+ku)
     DO j=jmin,jmax
        ib =  ku+i-j+1
        mat%val(ib,j) = arr(j)
     END DO
 !
     jmin=MAX(1,i-ku); jmax=i-1
     DO j=jmin,jmax
        ib=ku+1+j-i
        mat%val(ib,i) = CONJG(arr(j))
     END DO
   END SUBROUTINE putrow_zpb
 !===========================================================================
   SUBROUTINE factor_gb(mat,flops)
 !
 !  Factor the matrix, using Lapack
 !
     TYPE(gbmat), INTENT(inout) :: mat
     DOUBLE PRECISION, OPTIONAL, INTENT (OUT)  :: flops
     INTEGER :: lda, n, m, kl, ku
     INTEGER :: info
     DOUBLE PRECISION :: dopgb
     EXTERNAL dopgb
 !
     lda = SIZE(mat%val, 1)
     kl = mat%kl
     ku = mat%ku
     m = mat%mrows
     n = mat%ncols
     CALL dgbtrf(m, n, kl, ku, mat%val, lda, mat%piv, info)
     IF( info .NE. 0) THEN
        WRITE(*,*) 'FACTOR: info from GBTRF ', info
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     IF( PRESENT(flops) ) THEN
        flops = dopgb('DGBTRF',m, n, kl, ku, mat%piv)
     END IF
   END SUBROUTINE factor_gb
 !===========================================================================
   SUBROUTINE factor_periodic(mat)
 !
 !  Factor the periodic GB matrix, using the 
 !  Sherman-Morrisson-Woodburry formula
 !
     TYPE(periodic_mat), INTENT(inout) :: mat
     TYPE(gemat) :: hmat
     DOUBLE PRECISION :: one=1.0d0
     INTEGER :: bandw, mr, nc, i
 !
     bandw = SIZE(mat%matvt,1)
 !
 ! Factor A
     CALL factor(mat%mat)
 !
     IF(bandw .EQ. 0 ) RETURN   !  No off band terms
 !
 ! U <-- A^(-1) * U
     CALL bsolve(mat%mat, mat%matu)
 !
 ! H <-- 1 + V^T * U
     mr = SIZE(mat%matvt, 1)
     nc = SIZE(mat%matvt, 2)
     CALL init(mr, 0, hmat)  ! hmat is initialized to 0!
     DO i=1,mr
        hmat%val(i,i) = one
     END DO
     CALL dgemm('N', 'N', mr, mr, nc, one, mat%matvt, mr, &
          &      mat%matu, nc, one, hmat%val, mr)
 !
 ! V^T <-- H^(-1) V^T
     CALL factor(hmat)
     CALL bsolve(hmat, mat%matvt)
     CALL destroy(hmat)
 !
   END SUBROUTINE factor_periodic
 !===========================================================================
   SUBROUTINE factor_zperiodic(mat)
 !
 !  Factor the periodic GB matrix, using the 
 !  Sherman-Morrisson-Woodburry formula
 !
     TYPE(zperiodic_mat), INTENT(inout) :: mat
     TYPE(zgemat) :: hmat
     DOUBLE COMPLEX :: one=1.0d0
     INTEGER :: bandw, mr, nc, i
 !
     bandw = SIZE(mat%matvt,1)
 !
 ! Factor A
     CALL factor(mat%mat)
 !
     IF(bandw .EQ. 0 ) RETURN   !  No off band terms
 !
 ! U <-- A^(-1) * U
     CALL bsolve(mat%mat, mat%matu)
 !
 ! H <-- 1 + V^T * U
     mr = SIZE(mat%matvt, 1)
     nc = SIZE(mat%matvt, 2)
     CALL init(mr, 0, hmat)  ! hmat is initialized to 0!
     DO i=1,mr
        hmat%val(i,i) = one
     END DO
     CALL zgemm('N', 'N', mr, mr, nc, one, mat%matvt, mr, &
          &      mat%matu, nc, one, hmat%val, mr)
 !
 ! V^T <-- H^(-1) V^T
     CALL factor(hmat)
     CALL bsolve(hmat, mat%matvt)
     CALL destroy(hmat)
 !
   END SUBROUTINE factor_zperiodic
 !===========================================================================
   SUBROUTINE factor_pb(mat,flops)
 !
 !  Factor the matrix, using Lapack
 !
     TYPE(pbmat), INTENT(inout) :: mat
     DOUBLE PRECISION, OPTIONAL, INTENT (OUT)  :: flops
     INTEGER :: lda, n, ku
     INTEGER :: info
     DOUBLE PRECISION :: dopla
     EXTERNAL dopla
 !
     lda = SIZE(mat%val, 1)
     ku = mat%ku
     n = mat%rank
     CALL dpbtrf('U', n, ku, mat%val, lda, info)
     IF( info .NE. 0) THEN
        WRITE(*,*) 'FACTOR: info from PBTRF ', info
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     IF( PRESENT(flops) ) THEN
        flops = dopla('DPBTRF', n, n, ku, ku, 1)
     END IF
   END SUBROUTINE factor_pb
 !===========================================================================
   SUBROUTINE factor_ge(mat,flops)
 !
 !  Factor the matrix, using Lapack
 !
     TYPE(gemat), INTENT(inout) :: mat
     DOUBLE PRECISION, OPTIONAL, INTENT (OUT)  :: flops
     INTEGER :: n, m
     INTEGER :: info
     DOUBLE PRECISION :: dopla
     EXTERNAL dopla
 !
     m = mat%mrows
     n = mat%ncols
     CALL dgetrf(m, n, mat%val, m, mat%piv, info)
     IF( info .NE. 0) THEN
        WRITE(*,*) 'FACTOR: info from GETRF ', info
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     IF( PRESENT(flops) ) THEN
        flops = dopla('DGETRF',m, n, 0, 0, 0)
     END IF
   END SUBROUTINE factor_ge
 !===========================================================================
   SUBROUTINE factor_zgb(mat,flops)
 !
 !  Factor the matrix, using Lapack
 !
     TYPE(zgbmat), INTENT(inout) :: mat
     DOUBLE PRECISION, OPTIONAL, INTENT (OUT)  :: flops
     INTEGER :: lda, n, m, kl, ku
     INTEGER :: info
     DOUBLE PRECISION :: dopgb
     EXTERNAL dopgb
 !
     lda = SIZE(mat%val, 1)
     kl = mat%kl
     ku = mat%ku
     m = mat%mrows
     n = mat%ncols
     CALL zgbtrf(m, n, kl, ku, mat%val, lda, mat%piv, info)
     IF( info .NE. 0) THEN
        WRITE(*,*) 'FACTOR: info from GBTRF ', info
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     IF( PRESENT(flops) ) THEN
        flops = dopgb('ZGBTRF',m, n, kl, ku, mat%piv)
     END IF
   END SUBROUTINE factor_zgb
 !===========================================================================
   SUBROUTINE factor_zpb(mat,flops)
 !
 !  Factor the matrix, using Lapack
 !
     TYPE(zpbmat), INTENT(inout) :: mat
     DOUBLE PRECISION, OPTIONAL, INTENT (OUT)  :: flops
     INTEGER :: lda, n, ku
     INTEGER :: info
     DOUBLE PRECISION :: dopla
     EXTERNAL dopla
 !
     lda = SIZE(mat%val, 1)
     ku = mat%ku
     n = mat%rank
     CALL zpbtrf('U', n, ku, mat%val, lda, info)
     IF( info .NE. 0) THEN
        WRITE(*,*) 'FACTOR: info from PBTRF ', info
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     IF( PRESENT(flops) ) THEN
        flops = dopla('ZPBTRF', n, n, ku, ku, 1)
     END IF
   END SUBROUTINE factor_zpb
 !===========================================================================
   SUBROUTINE factor_zge(mat,flops)
 !
 !  Factor the matrix, using Lapack
 !
     TYPE(zgemat), INTENT(inout) :: mat
     DOUBLE PRECISION, OPTIONAL, INTENT (OUT)  :: flops
     INTEGER :: n, m
     INTEGER :: info
     DOUBLE PRECISION :: dopla
     EXTERNAL dopla
 !
     m = mat%mrows
     n = mat%ncols
     CALL zgetrf(m, n, mat%val, m, mat%piv, info)
     IF( info .NE. 0) THEN
        WRITE(*,*) 'FACTOR: info from GETRF ', info
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
     IF( PRESENT(flops) ) THEN
        flops = dopla('ZGETRF',m, n, 0, 0, 0)
     END IF
   END SUBROUTINE factor_zge
 !===========================================================================
   SUBROUTINE bsolve_gb1(mat, rhs, sol)
 !
 !   Backsolve, using Lapack
 !
     TYPE(gbmat), INTENT(inout) :: mat
     DOUBLE PRECISION, DIMENSION (:) :: rhs
     DOUBLE PRECISION, DIMENSION (:), OPTIONAL, INTENT (out) :: sol
     INTEGER :: lda, n, kl, ku
     INTEGER :: info
 !----------------------------------------------------------------------
     lda = SIZE(mat%val, 1)
     kl = mat%kl
     ku = mat%ku
     n = mat%rank
 !
     IF( PRESENT(sol) ) THEN
        sol = rhs
        CALL dgbtrs('N', n, kl, ku, 1, mat%val, lda, mat%piv, sol, n, info)
     ELSE
        CALL dgbtrs('N', n, kl, ku, 1, mat%val, lda, mat%piv, rhs, n, info)
     END IF
     IF( info .NE. 0) THEN
        WRITE(*,*) 'FACTOR: info from GBTRS ', info
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
   END SUBROUTINE bsolve_gb1
 !===========================================================================
   SUBROUTINE bsolve_periodic1(mat, rhs, sol)
 !
 !   Backsolve, using the Sherman-Morrison-Woodburry formula
 !
     TYPE(periodic_mat), INTENT(inout) :: mat
     DOUBLE PRECISION, DIMENSION (:) :: rhs
     DOUBLE PRECISION, DIMENSION (:), OPTIONAL, INTENT (out) :: sol
     DOUBLE PRECISION :: one=1.0d0, zero=0.0d0, minus1=-1.0d0
     DOUBLE PRECISION, ALLOCATABLE :: tarr(:,:)
     INTEGER :: rank, bandw, nrhs
     INTEGER :: info
 !----------------------------------------------------------------------
     rank = mat%mat%rank
     bandw = SIZE(mat%matvt,1)
     nrhs = 1
 !
 !  Solve Ay = f
     IF( PRESENT(sol) ) THEN
        CALL bsolve(mat%mat, rhs, sol)
     ELSE
        CALL bsolve(mat%mat, rhs)
     END IF
 !
     IF(bandw .EQ. 0 ) RETURN   !  No off band terms
 !
 !  t = V^T*y ( = W^T*y )
     ALLOCATE(tarr(bandw,nrhs))
     IF( PRESENT(sol) ) THEN
        CALL dgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, sol, &
             &      rank, zero, tarr, bandw)
     ELSE
        CALL dgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, rhs, &
             &      rank, zero, tarr, bandw)
     END IF
 !
 !  y = y - U*t ( = y-Z*t)
    IF( PRESENT(sol) ) THEN
       CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, &
            &      bandw, one, sol, rank)
    ELSE
       CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, &
            &      bandw, one, rhs, rank)
    END IF
 !
     DEALLOCATE(tarr)
   END SUBROUTINE bsolve_periodic1
 !===========================================================================
   SUBROUTINE bsolve_zperiodic1(mat, rhs, sol)
 !
 !   Backsolve, using the Sherman-Morrison-Woodburry formula
 !
     TYPE(zperiodic_mat), INTENT(inout) :: mat
     DOUBLE COMPLEX, DIMENSION (:) :: rhs
     DOUBLE COMPLEX, DIMENSION (:), OPTIONAL, INTENT (out) :: sol
     DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0, minus1=-1.0d0
     DOUBLE COMPLEX, ALLOCATABLE :: tarr(:,:)
     INTEGER :: rank, bandw, nrhs
     INTEGER :: info
 !----------------------------------------------------------------------
     rank = mat%mat%rank
     bandw = SIZE(mat%matvt,1)
     nrhs = 1
 !
 !  Solve Ay = f
     IF( PRESENT(sol) ) THEN
        CALL bsolve(mat%mat, rhs, sol)
     ELSE
        CALL bsolve(mat%mat, rhs)
     END IF
 !
     IF(bandw .EQ. 0 ) RETURN   !  No off band terms
 !
 !  t = V^T*y ( = W^T*y )
     ALLOCATE(tarr(bandw,nrhs))
     IF( PRESENT(sol) ) THEN
        CALL zgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, sol, &
             &      rank, zero, tarr, bandw)
     ELSE
        CALL zgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, rhs, &
             &      rank, zero, tarr, bandw)
     END IF
 !
 !  y = y - U*t ( = y-Z*t)
    IF( PRESENT(sol) ) THEN
       CALL zgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, &
            &      bandw, one, sol, rank)
    ELSE
       CALL zgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, &
            &      bandw, one, rhs, rank)
    END IF
 !
     DEALLOCATE(tarr)
   END SUBROUTINE bsolve_zperiodic1
 !===========================================================================
   SUBROUTINE bsolve_pb1(mat, rhs, sol)
 !
 !   Backsolve, using Lapack
 !
     TYPE(pbmat), INTENT(inout) :: mat
     DOUBLE PRECISION, DIMENSION (:) :: rhs
     DOUBLE PRECISION, DIMENSION (:), OPTIONAL, INTENT (out) :: sol
     INTEGER :: lda, n, ku
     INTEGER :: info
 !----------------------------------------------------------------------
     lda = SIZE(mat%val, 1)
     ku = mat%ku
     n = mat%rank
 !
     IF( PRESENT(sol) ) THEN
        sol = rhs
        CALL dpbtrs('U', n, ku, 1, mat%val, lda, sol, n, info)
     ELSE
        CALL dpbtrs('U', n, ku, 1, mat%val, lda, rhs, n, info)
     END IF
     IF( info .NE. 0) THEN
        WRITE(*,*) 'FACTOR: info from PBTRS ', info
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
   END SUBROUTINE bsolve_pb1
 !===========================================================================
   SUBROUTINE bsolve_ge1(mat, rhs, sol)
 !
 !   Backsolve, using Lapack
 !
     TYPE(gemat), INTENT(inout) :: mat
     DOUBLE PRECISION, DIMENSION (:) :: rhs
     DOUBLE PRECISION, DIMENSION (:), OPTIONAL, INTENT (out) :: sol
     INTEGER :: n
     INTEGER :: info
 !----------------------------------------------------------------------
     n = mat%rank
 !
     IF( PRESENT(sol) ) THEN
        sol = rhs
        CALL dgetrs('N', n, 1, mat%val, n, mat%piv, sol, n, info)
     ELSE
        CALL dgetrs('N', n, 1, mat%val, n, mat%piv, rhs, n, info)
     END IF
     IF( info .NE. 0) THEN
        WRITE(*,*) 'FACTOR: info from GETRS ', info
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
   END SUBROUTINE bsolve_ge1
 !===========================================================================
   SUBROUTINE bsolve_gbn(mat, rhs, sol)
 !
 !   Backsolve, using Lapack
 !
     TYPE(gbmat), INTENT(inout) :: mat
     DOUBLE PRECISION, DIMENSION (:,:) :: rhs
     DOUBLE PRECISION, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol
     INTEGER :: lda, n, nrhs, kl, ku
     INTEGER :: info
 !----------------------------------------------------------------------
     lda = SIZE(mat%val, 1)
     kl = mat%kl
     ku = mat%ku
     n = mat%rank
     nrhs = SIZE(rhs,2)
 !
     IF( PRESENT(sol) ) THEN
        sol(:,1:nrhs) = rhs(:,1:nrhs)
        CALL dgbtrs('N', n, kl, ku, nrhs, mat%val, lda, mat%piv, sol, n, info)
     ELSE
        CALL dgbtrs('N', n, kl, ku, nrhs, mat%val, lda, mat%piv, rhs, n, info)
     END IF
     IF( info .NE. 0) THEN
        WRITE(*,*) 'FACTOR: info from GBTRS ', info
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
   END SUBROUTINE bsolve_gbn
 !===========================================================================
   SUBROUTINE bsolve_periodicn(mat, rhs, sol)
 !
 !   Backsolve, using the Sherman-Morrison-Woodburry formula
 !
     TYPE(periodic_mat), INTENT(inout) :: mat
     DOUBLE PRECISION, DIMENSION (:,:) :: rhs
     DOUBLE PRECISION, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol
     DOUBLE PRECISION :: one=1.0d0, zero=0.0d0, minus1=-1.0d0
     DOUBLE PRECISION, ALLOCATABLE :: tarr(:,:)
     INTEGER :: rank, bandw, nrhs
     INTEGER :: info
 !----------------------------------------------------------------------
     rank = mat%mat%rank
     bandw = SIZE(mat%matvt,1)
     nrhs = SIZE(rhs,2)
 !
 !  Solve Ay = f
     IF( PRESENT(sol) ) THEN
        CALL bsolve(mat%mat, rhs, sol)
     ELSE
        CALL bsolve(mat%mat, rhs)
     END IF
 !
     IF(bandw .EQ. 0 ) RETURN   !  No off band terms
 !
 !  t = V^T*y ( = W^T*y )
     ALLOCATE(tarr(bandw,nrhs))
     IF( PRESENT(sol) ) THEN
        CALL dgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, sol, &
             &      rank, zero, tarr, bandw)
     ELSE
        CALL dgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, rhs, &
             &      rank, zero, tarr, bandw)
     END IF
 !
 !  y = y - U*t ( = y-Z*t)
    IF( PRESENT(sol) ) THEN
       CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, &
            &      bandw, one, sol, rank)
    ELSE
       CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, &
            &      bandw, one, rhs, rank)
    END IF
 !
     DEALLOCATE(tarr)
   END SUBROUTINE bsolve_periodicn
 !===========================================================================
   SUBROUTINE bsolve_zperiodicn(mat, rhs, sol)
 !
 !   Backsolve, using the Sherman-Morrison-Woodburry formula
 !
     TYPE(zperiodic_mat), INTENT(inout) :: mat
     DOUBLE COMPLEX, DIMENSION (:,:) :: rhs
     DOUBLE COMPLEX, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol
     DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0, minus1=-1.0d0
     DOUBLE COMPLEX, ALLOCATABLE :: tarr(:,:)
     INTEGER :: rank, bandw, nrhs
     INTEGER :: info
 !----------------------------------------------------------------------
     rank = mat%mat%rank
     bandw = SIZE(mat%matvt,1)
     nrhs = SIZE(rhs,2)
 !
 !  Solve Ay = f
     IF( PRESENT(sol) ) THEN
        CALL bsolve(mat%mat, rhs, sol)
     ELSE
        CALL bsolve(mat%mat, rhs)
     END IF
 !
     IF(bandw .EQ. 0 ) RETURN   !  No off band terms
 !
 !  t = V^T*y ( = W^T*y )
     ALLOCATE(tarr(bandw,nrhs))
     IF( PRESENT(sol) ) THEN
        CALL zgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, sol, &
             &      rank, zero, tarr, bandw)
     ELSE
        CALL zgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, rhs, &
             &      rank, zero, tarr, bandw)
     END IF
 !
 !  y = y - U*t ( = y-Z*t)
    IF( PRESENT(sol) ) THEN
       CALL zgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, &
            &      bandw, one, sol, rank)
    ELSE
       CALL zgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, &
            &      bandw, one, rhs, rank)
    END IF
 !
     DEALLOCATE(tarr)
   END SUBROUTINE bsolve_zperiodicn
 !===========================================================================
   SUBROUTINE bsolve_pbn(mat, rhs, sol)
 !
 !   Backsolve, using Lapack
 !
     TYPE(pbmat), INTENT(inout) :: mat
     DOUBLE PRECISION, DIMENSION (:,:) :: rhs
     DOUBLE PRECISION, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol
     INTEGER :: lda, n, nrhs, ku
     INTEGER :: info
 !----------------------------------------------------------------------
     lda = SIZE(mat%val, 1)
     ku = mat%ku
     n = mat%rank
     nrhs = SIZE(rhs,2)
 !
     IF( PRESENT(sol) ) THEN
        sol(:,1:nrhs) = rhs(:,1:nrhs)
        CALL dpbtrs('U', n, ku, nrhs, mat%val, lda, sol, n, info)
     ELSE
        CALL dpbtrs('U', n, ku, nrhs, mat%val, lda, rhs, n, info)
     END IF
     IF( info .NE. 0) THEN
        WRITE(*,*) 'FACTOR: info from GBTRS ', info
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
   END SUBROUTINE bsolve_pbn
 !===========================================================================
   SUBROUTINE bsolve_gen(mat, rhs, sol)
 !
 !   Backsolve, using Lapack
 !
     TYPE(gemat), INTENT(inout) :: mat
     DOUBLE PRECISION, DIMENSION (:,:) :: rhs
     DOUBLE PRECISION, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol
     INTEGER :: n, nrhs
     INTEGER :: info
 !----------------------------------------------------------------------
     n = mat%rank
     nrhs = SIZE(rhs,2)
 !
     IF( PRESENT(sol) ) THEN
        sol(:,1:nrhs) = rhs
        CALL dgetrs('N', n, nrhs, mat%val, n, mat%piv, sol, n, info)
     ELSE
        CALL dgetrs('N', n, nrhs, mat%val, n, mat%piv, rhs, n, info)
     END IF
     IF( info .NE. 0) THEN
        WRITE(*,*) 'FACTOR: info from GETRS ', info
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
   END SUBROUTINE bsolve_gen
 !===========================================================================
   SUBROUTINE bsolve_zgb1(mat, rhs, sol)
 !
 !   Backsolve, using Lapack
 !
     TYPE(zgbmat), INTENT(inout) :: mat
     DOUBLE COMPLEX, DIMENSION (:) :: rhs
     DOUBLE COMPLEX, DIMENSION (:), OPTIONAL, INTENT (out) :: sol
     INTEGER :: lda, n, kl, ku
     INTEGER :: info
 !----------------------------------------------------------------------
     lda = SIZE(mat%val, 1)
     kl = mat%kl
     ku = mat%ku
     n = mat%rank
 !
     IF( PRESENT(sol) ) THEN
        sol = rhs
        CALL zgbtrs('N', n, kl, ku, 1, mat%val, lda, mat%piv, sol, n, info)
     ELSE
        CALL zgbtrs('N', n, kl, ku, 1, mat%val, lda, mat%piv, rhs, n, info)
     END IF
     IF( info .NE. 0) THEN
        WRITE(*,*) 'FACTOR: info from GBTRS ', info
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
   END SUBROUTINE bsolve_zgb1
 !===========================================================================
   SUBROUTINE bsolve_zpb1(mat, rhs, sol)
 !
 !   Backsolve, using Lapack
 !
     TYPE(zpbmat), INTENT(inout) :: mat
     DOUBLE COMPLEX, DIMENSION (:) :: rhs
     DOUBLE COMPLEX, DIMENSION (:), OPTIONAL, INTENT (out) :: sol
     INTEGER :: lda, n, ku
     INTEGER :: info
 !----------------------------------------------------------------------
     lda = SIZE(mat%val, 1)
     ku = mat%ku
     n = mat%rank
 !
     IF( PRESENT(sol) ) THEN
        sol = rhs
        CALL zpbtrs('U', n, ku, 1, mat%val, lda, sol, n, info)
     ELSE
        CALL zpbtrs('U', n, ku, 1, mat%val, lda, rhs, n, info)
     END IF
     IF( info .NE. 0) THEN
        WRITE(*,*) 'FACTOR: info from PBTRS ', info
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
   END SUBROUTINE bsolve_zpb1
 !===========================================================================
   SUBROUTINE bsolve_zge1(mat, rhs, sol)
 !
 !   Backsolve, using Lapack
 !
     TYPE(zgemat), INTENT(inout) :: mat
     DOUBLE COMPLEX, DIMENSION (:) :: rhs
     DOUBLE COMPLEX, DIMENSION (:), OPTIONAL, INTENT (out) :: sol
     INTEGER :: n
     INTEGER :: info
 !----------------------------------------------------------------------
     n = mat%rank
 !
     IF( PRESENT(sol) ) THEN
        sol = rhs
        CALL zgetrs('N', n, 1, mat%val, n, mat%piv, sol, n, info)
     ELSE
        CALL zgetrs('N', n, 1, mat%val, n, mat%piv, rhs, n, info)
     END IF
     IF( info .NE. 0) THEN
        WRITE(*,*) 'FACTOR: info from GETRS ', info
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
   END SUBROUTINE bsolve_zge1
 !===========================================================================
   SUBROUTINE bsolve_zgbn(mat, rhs, sol)
 !
 !   Backsolve, using Lapack
 !
     TYPE(zgbmat), INTENT(inout) :: mat
     DOUBLE COMPLEX, DIMENSION (:,:) :: rhs
     DOUBLE COMPLEX, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol
     INTEGER :: lda, n, nrhs, kl, ku
     INTEGER :: info
 !----------------------------------------------------------------------
     lda = SIZE(mat%val, 1)
     kl = mat%kl
     ku = mat%ku
     n = mat%rank
     nrhs = SIZE(rhs,2)
 !
     IF( PRESENT(sol) ) THEN
        sol(:,1:nrhs) = rhs(:,1:nrhs)
        CALL zgbtrs('N', n, kl, ku, nrhs, mat%val, lda, mat%piv, sol, n, info)
     ELSE
        CALL zgbtrs('N', n, kl, ku, nrhs, mat%val, lda, mat%piv, rhs, n, info)
     END IF
     IF( info .NE. 0) THEN
        WRITE(*,*) 'FACTOR: info from GBTRS ', info
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
   END SUBROUTINE bsolve_zgbn
 !===========================================================================
   SUBROUTINE bsolve_zpbn(mat, rhs, sol)
 !
 !   Backsolve, using Lapack
 !
     TYPE(zpbmat), INTENT(inout) :: mat
     DOUBLE COMPLEX, DIMENSION (:,:) :: rhs
     DOUBLE COMPLEX, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol
     INTEGER :: lda, n, nrhs, ku
     INTEGER :: info
 !----------------------------------------------------------------------
     lda = SIZE(mat%val, 1)
     ku = mat%ku
     n = mat%rank
     nrhs = SIZE(rhs,2)
 !
     IF( PRESENT(sol) ) THEN
        sol(:,1:nrhs) = rhs(:,1:nrhs)
        CALL zpbtrs('U', n, ku, nrhs, mat%val, lda, sol, n, info)
     ELSE
        CALL zpbtrs('U', n, ku, nrhs, mat%val, lda, rhs, n, info)
     END IF
     IF( info .NE. 0) THEN
        WRITE(*,*) 'FACTOR: info from GBTRS ', info
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
   END SUBROUTINE bsolve_zpbn
 !===========================================================================
   SUBROUTINE bsolve_zgen(mat, rhs, sol)
 !
 !   Backsolve, using Lapack
 !
     TYPE(zgemat), INTENT(inout) :: mat
     DOUBLE COMPLEX, DIMENSION (:,:) :: rhs
     DOUBLE COMPLEX, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol
     INTEGER :: n, nrhs
     INTEGER :: info
 !----------------------------------------------------------------------
     n = mat%rank
     nrhs = SIZE(rhs,2)
 !
     IF( PRESENT(sol) ) THEN
        sol(:,1:nrhs) = rhs
        CALL zgetrs('N', n, nrhs, mat%val, n, mat%piv, sol, n, info)
     ELSE
        CALL zgetrs('N', n, nrhs, mat%val, n, mat%piv, rhs, n, info)
     END IF
     IF( info .NE. 0) THEN
        WRITE(*,*) 'FACTOR: info from GETRS ', info
        STOP '*** Abnormal EXIT in MODULE matrix ***'
     END IF
   END SUBROUTINE bsolve_zgen
 !===========================================================================
   FUNCTION vmx_gb(mat, x, trans) RESULT(vmx)
 !
 !   Return product mat*x
 !
     TYPE(gbmat), INTENT(in) :: mat
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: x
     CHARACTER(len=1), OPTIONAL :: trans
     DOUBLE PRECISION, ALLOCATABLE :: vmx(:)
     DOUBLE PRECISION :: one=1.0d0, zero=0.0d0
     INTEGER :: lda, kl, ku, m, n, j, imin, imax, ibmin, ibmax
     CHARACTER(len=1) :: trans_loc
 !
     lda = SIZE(mat%val, 1)
     kl = mat%kl
     ku = mat%ku
     m = mat%mrows
     n = mat%ncols
     trans_loc = 'N'
     IF(PRESENT(trans)) trans_loc = trans
 !
     IF(trans_loc.EQ.'N') THEN
        ALLOCATE(vmx(m))
     ELSE
        ALLOCATE(vmx(n))
     END IF
 !
     CALL dgbmv(trans_loc, m, n, kl, ku, one, mat%val(kl+1,1), lda, x, 1, zero,&
          &      vmx, 1)
   END FUNCTION vmx_gb
 !===========================================================================
   FUNCTION vmx_ge(mat, x, trans) RESULT(vmx)
 !
 !   Return product mat*x
 !
     TYPE(gemat), INTENT(in) :: mat
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: x
     CHARACTER(len=1), OPTIONAL :: trans
     DOUBLE PRECISION, ALLOCATABLE :: vmx(:)
     DOUBLE PRECISION :: one=1.0d0, zero=0.0d0
     INTEGER :: lda, m, n
     CHARACTER(len=1) :: trans_loc
 !
     lda = SIZE(mat%val, 1)
     m = mat%mrows
     n = mat%ncols
     trans_loc = 'N'
     IF(PRESENT(trans)) trans_loc = trans
 !
     IF(trans_loc.EQ.'N') THEN
        ALLOCATE(vmx(m))
     ELSE
        ALLOCATE(vmx(n))
     END IF
 !
     CALL dgemv(trans_loc, m, n, one, mat%val, lda, x, 1, zero, vmx, 1)
   END FUNCTION vmx_ge
 !===========================================================================
   FUNCTION vmx_gen(mat, x, trans) RESULT(vmx)
 !
 !   Return product mat*x
 !
     TYPE(gemat), INTENT(in) :: mat
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: x
     CHARACTER(len=1), OPTIONAL :: trans
     DOUBLE PRECISION, ALLOCATABLE :: vmx(:,:)
     DOUBLE PRECISION :: one=1.0d0, zero=0.0d0
     INTEGER :: lda, ldb, m, n, k
     CHARACTER(len=1) :: trans_loc
 !
     lda = SIZE(mat%val, 1)
     ldb = SIZE(x,1)
     trans_loc = 'N'
     IF(PRESENT(trans)) trans_loc = trans
 !
     IF(trans_loc.EQ.'N') THEN
        m = mat%mrows
        n = SIZE(x,2)
        k = mat%ncols
        ALLOCATE(vmx(m,n))
     ELSE
        m = mat%ncols
        n = SIZE(x,2)
        k = mat%mrows
        ALLOCATE(vmx(m,n))
     END IF
 !
     CALL dgemm(trans_loc, 'N', m, n, k, one, mat%val, lda, x, ldb, zero, vmx, &
          &     lda)
 !
   END FUNCTION vmx_gen
 !===========================================================================
   FUNCTION vmx_zge(mat, x, trans) RESULT(vmx)
 !
 !   Return product mat*x
 !
     TYPE(zgemat), INTENT(in) :: mat
     DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: x
     CHARACTER(len=1), OPTIONAL :: trans
     DOUBLE COMPLEX, ALLOCATABLE :: vmx(:)
     DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0
     INTEGER :: lda, m, n
     CHARACTER(len=1) :: trans_loc
 !
     lda = SIZE(mat%val, 1)
     m = mat%mrows
     n = mat%ncols
     trans_loc = 'N'
     IF(PRESENT(trans)) trans_loc = trans
 !
     IF(trans_loc.EQ.'N') THEN
        ALLOCATE(vmx(m))
     ELSE
        ALLOCATE(vmx(n))
     END IF
 !
     CALL zgemv(trans_loc, m, n, one, mat%val, lda, x, 1, zero, vmx, 1)
   END FUNCTION vmx_zge
 !===========================================================================
   FUNCTION vmx_zgen(mat, x, trans) RESULT(vmx)
 !
 !   Return product mat*x
 !
     TYPE(zgemat), INTENT(in) :: mat
     DOUBLE COMPLEX, DIMENSION(:,:), INTENT(in) :: x
     CHARACTER(len=1), OPTIONAL :: trans
     DOUBLE COMPLEX, ALLOCATABLE :: vmx(:,:)
     DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0
     INTEGER :: lda, ldb, m, n, k
     CHARACTER(len=1) :: trans_loc
 !
     lda = SIZE(mat%val, 1)
     ldb = SIZE(x,1)
     trans_loc = 'N'
     IF(PRESENT(trans)) trans_loc = trans
 !
     IF(trans_loc.EQ.'N') THEN
        m = mat%mrows
        n = SIZE(x,2)
        k = mat%ncols
        ALLOCATE(vmx(m,n))
     ELSE
        m = mat%ncols
        n = SIZE(x,2)
        k = mat%mrows
        ALLOCATE(vmx(m,n))
     END IF
 !
     CALL zgemm(trans_loc, 'N', m, n, k, one, mat%val, lda, x, ldb, zero, vmx, &
          &     lda)
 !
   END FUNCTION vmx_zgen
 !===========================================================================
   FUNCTION vmx_periodic(mat, x)
 !
 !   Return product mat*x
 !
     TYPE(periodic_mat), INTENT(in) :: mat
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: x
     DOUBLE PRECISION, DIMENSION(SIZE(x)) :: vmx_periodic
     INTEGER :: kl, ku, n, i, ii
 !
     kl = mat%mat%kl
     ku = mat%mat%ku
     n  = mat%mat%rank
 !
     vmx_periodic = vmx(mat%mat, x)
 !
     DO i=1,kl
        vmx_periodic(i) =  vmx_periodic(i) + &
             &             DOT_PRODUCT(mat%matvt(i,n-kl+1:n), x(n-kl+1:n))
     END DO
 !
     DO i=n-ku+1,n
        ii = i-n+ku+kl
        vmx_periodic(i) =  vmx_periodic(i) + &
             &             DOT_PRODUCT(mat%matvt(ii,1:ku), x(1:ku))
     END DO
   END FUNCTION vmx_periodic
 !===========================================================================
   FUNCTION vmx_zperiodic(mat, x)
 !
 !   Return product mat*x
 !
     TYPE(zperiodic_mat), INTENT(in) :: mat
     DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: x
     DOUBLE COMPLEX, DIMENSION(SIZE(x)) :: vmx_zperiodic
     INTEGER :: kl, ku, n, i, ii
 !
     kl = mat%mat%kl
     ku = mat%mat%ku
     n  = mat%mat%rank
 !
     vmx_zperiodic = vmx(mat%mat, x)
 !
     DO i=1,kl
        vmx_zperiodic(i) =  vmx_zperiodic(i) + &
             &             DOT_PRODUCT(mat%matvt(i,n-kl+1:n), x(n-kl+1:n))
     END DO
 !
     DO i=n-ku+1,n
        ii = i-n+ku+kl
        vmx_zperiodic(i) =  vmx_zperiodic(i) + &
             &             DOT_PRODUCT(mat%matvt(ii,1:ku), x(1:ku))
     END DO
   END FUNCTION vmx_zperiodic
 !===========================================================================
   FUNCTION vmx_gbn(mat, x, trans) RESULT(vmx)
 !
 !   Return product mat*x
 !
     TYPE(gbmat), INTENT(in) :: mat
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: x
     CHARACTER(len=1), OPTIONAL :: trans
     DOUBLE PRECISION, ALLOCATABLE :: vmx(:,:)
     DOUBLE PRECISION :: one=1.0d0, zero=0.0d0
     INTEGER :: lda, kl, ku, m, n, j, k, imin, imax, ibmin, ibmax
     CHARACTER(len=1) :: trans_loc
 !
     lda = SIZE(mat%val, 1)
     kl = mat%kl
     ku = mat%ku
     m = mat%mrows
     n = mat%ncols
     trans_loc = 'N'
     IF(PRESENT(trans)) trans_loc = trans
 !
     IF(trans_loc.EQ.'N') THEN
        ALLOCATE(vmx(m,SIZE(x,2)))
     ELSE
        ALLOCATE(vmx(n,SIZE(x,2)))
     END IF
 !
     DO k=1,SIZE(x,2)
        CALL dgbmv(trans_loc, m, n, kl, ku, one, mat%val(kl+1,1), lda, &
                &      x(1,k), 1, zero, vmx(1,k), 1)
     END DO
   END FUNCTION vmx_gbn
 !===========================================================================
   FUNCTION vmx_pb(mat, x)
 !
 !   Return product mat*x
 !
     TYPE(pbmat), INTENT(in) :: mat
     DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: x
     DOUBLE PRECISION, DIMENSION(SIZE(x)) :: vmx_pb
     INTEGER :: lda, ku, n, i, j, imin, imax, ib, ibmin, ibmax
 !
     lda = SIZE(mat%val, 1)
     ku = mat%ku
     n = mat%rank
 !
     vmx_pb = 0.0d0
     DO j=1,n
        imin = MAX(1,j-ku); imax = j  ! The column in the upper diagonal part
        ibmin =  ku+imin-j+1; ibmax =  ku+imax-j+1
        vmx_pb(imin:imax) =  vmx_pb(imin:imax) + mat%val(ibmin:ibmax,j)*x(j)
 !
        imin=j+1; imax = MIN(n,j+ku)  ! The column in the lower diagonal part
        DO i=imin,imax
           ib = ku+1+j-i
           vmx_pb(i) = vmx_pb(i) + mat%val(ib,i)*x(j)
        END DO
     END DO
   END FUNCTION vmx_pb
 !===========================================================================
   FUNCTION vmx_pbn(mat, x)
 !
 !   Return product mat*x
 !
     TYPE(pbmat), INTENT(in) :: mat
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: x
     DOUBLE PRECISION, DIMENSION(SIZE(x,1),SIZE(x,2)) :: vmx_pbn
     INTEGER :: lda, ku, n, i, j, k, imin, imax, ib, ibmin, ibmax
 !
     lda = SIZE(mat%val, 1)
     ku = mat%ku
     n = mat%rank
 !
     vmx_pbn = 0.0d0
     DO j=1,n
        imin = MAX(1,j-ku); imax = j  ! The column in the upper diagonal part
        ibmin =  ku+imin-j+1; ibmax =  ku+imax-j+1
        DO k=1,SIZE(x,2)
           vmx_pbn(imin:imax,k) =  vmx_pbn(imin:imax,k) + &
                &                  mat%val(ibmin:ibmax,j)*x(j,k)
        END DO
 !
        imin=j+1; imax = MIN(n,j+ku)  ! The column in the lower diagonal part
        DO i=imin,imax
           ib = ku+1+j-i
           vmx_pbn(i,:) = vmx_pbn(i,:) + mat%val(ib,i)*x(j,:)
        END DO
     END DO
   END FUNCTION vmx_pbn
 !===========================================================================
   FUNCTION vmx_zgb(mat, x, trans) RESULT(vmx)
 !
 !   Return product mat*x
 !
     TYPE(zgbmat), INTENT(in) :: mat
     DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: x
     CHARACTER(len=1), OPTIONAL :: trans
     DOUBLE COMPLEX, ALLOCATABLE :: vmx(:)
     DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0
     INTEGER :: lda, kl, ku, m, n, j, imin, imax, ibmin, ibmax
     CHARACTER(len=1) :: trans_loc
 !
     lda = SIZE(mat%val, 1)
     kl = mat%kl
     ku = mat%ku
     m = mat%mrows
     n = mat%ncols
     trans_loc = 'N'
     IF(PRESENT(trans)) trans_loc = trans
 !
     IF(trans_loc.EQ.'N') THEN
        ALLOCATE(vmx(m))
     ELSE
        ALLOCATE(vmx(n))
     END IF
 !
     CALL zgbmv(trans_loc, m, n, kl, ku, one, mat%val(kl+1,1), lda, x, 1, zero,&
          &      vmx, 1)
   END FUNCTION vmx_zgb
 !===========================================================================
   FUNCTION vmx_zgbn(mat, x, trans) RESULT(vmx)
 !
 !   Return product mat*x
 !
     TYPE(zgbmat), INTENT(in) :: mat
     DOUBLE COMPLEX, DIMENSION(:,:), INTENT(in) :: x
     CHARACTER(len=1), OPTIONAL :: trans
     DOUBLE COMPLEX, ALLOCATABLE :: vmx(:,:)
     DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0
     INTEGER :: lda, kl, ku, m, n, j, k, imin, imax, ibmin, ibmax
     CHARACTER(len=1) :: trans_loc
 !
     lda = SIZE(mat%val, 1)
     kl = mat%kl
     ku = mat%ku
     m = mat%mrows
     n = mat%ncols
     trans_loc = 'N'
     IF(PRESENT(trans)) trans_loc = trans
 !
     IF(trans_loc.EQ.'N') THEN
        ALLOCATE(vmx(m,SIZE(x,2)))
     ELSE
        ALLOCATE(vmx(n,SIZE(x,2)))
     END IF
 !
     DO k=1,SIZE(x,2)
        CALL zgbmv(trans_loc, m, n, kl, ku, one, mat%val(kl+1,1), lda, &
                &      x(1,k), 1, zero, vmx(1,k), 1)
     END DO
   END FUNCTION vmx_zgbn
 !===========================================================================
   FUNCTION vmx_zpb(mat, x)
 !
 !   Return product mat*x
 !
     TYPE(zpbmat), INTENT(in) :: mat
     DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: x
     DOUBLE COMPLEX, DIMENSION(SIZE(x)) :: vmx_zpb
     INTEGER :: lda, ku, n, i, j, imin, imax, ib, ibmin, ibmax
 !
     lda = SIZE(mat%val, 1)
     ku = mat%ku
     n = mat%rank
 !
     vmx_zpb = 0.0d0
     DO j=1,n
        imin = MAX(1,j-ku); imax = j  ! The column in the upper diagonal part
        ibmin =  ku+imin-j+1; ibmax =  ku+imax-j+1
        vmx_zpb(imin:imax) =  vmx_zpb(imin:imax) + mat%val(ibmin:ibmax,j)*x(j)
 !
        imin=j+1; imax = MIN(n,j+ku)  ! The column in the lower diagonal part
        DO i=imin,imax
           ib = ku+1+j-i
           vmx_zpb(i) = vmx_zpb(i) + CONJG(mat%val(ib,i))*x(j)
        END DO
     END DO
   END FUNCTION vmx_zpb
 !===========================================================================
   FUNCTION vmx_zpbn(mat, x)
 !
 !   Return product mat*x
 !
     TYPE(zpbmat), INTENT(in) :: mat
     DOUBLE COMPLEX, DIMENSION(:,:), INTENT(in) :: x
     DOUBLE COMPLEX, DIMENSION(SIZE(x,1),SIZE(x,2)) :: vmx_zpbn
     INTEGER :: lda, ku, n, i, j, k, imin, imax, ib, ibmin, ibmax
 !
     lda = SIZE(mat%val, 1)
     ku = mat%ku
     n = mat%rank
 !
     vmx_zpbn = 0.0d0
     DO j=1,n
        imin = MAX(1,j-ku); imax = j  ! The column in the upper diagonal part
        ibmin =  ku+imin-j+1; ibmax =  ku+imax-j+1
        DO k=1,SIZE(x,2)
           vmx_zpbn(imin:imax,k) = vmx_zpbn(imin:imax,k) + &
                &                  mat%val(ibmin:ibmax,j)*x(j,k)
        END DO
 !
        imin=j+1; imax = MIN(n,j+ku)  ! The column in the lower diagonal part
        DO i=imin,imax
           ib = ku+1+j-i
           vmx_zpbn(i,:) = vmx_zpbn(i,:) + CONJG(mat%val(ib,i))*x(j,:)
        END DO
     END DO
   END FUNCTION vmx_zpbn
 !===========================================================================
   SUBROUTINE determinant_ge(mat, base, pow)
 !
 !   Return the determinant of mat
 !
     TYPE(gemat) :: mat
     INTEGER :: pow, i
     DOUBLE PRECISION :: base
 !
     CALL factor(mat)
     base = 1.0d0
     pow = 0
     DO i=1,mat%rank
        IF( mat%piv(i) .NE. i) base = -base
        base = mat%val(i,i)*base
        IF( base .EQ. 0.0d0 ) THEN
           WRITE(*,*) 'DETERMINANT_GE: matrix is singular'
           STOP '*** Abnormal EXIT in MODULE matrix ***'
        END IF
        DO
           IF( ABS(base) .GE. 1.0d0 ) EXIT
           base = 10.0d0*base
           pow = pow - 1
        END DO
        DO
           IF( ABS(base) .LT. 10.0d0 ) EXIT
           base = base/10.0d0
           pow = pow + 1
        END DO
     END DO
   END SUBROUTINE determinant_ge
 !===========================================================================
   SUBROUTINE determinant_gb(mat, base, pow)
 !
 !   Return the determinant of mat
 !
     TYPE(gbmat) :: mat
     INTEGER :: pow, i, ib
     DOUBLE PRECISION :: base
 !
     CALL factor(mat)
     base = 1.0d0
     pow = 0
     ib=mat%kl + mat%ku + 1
     DO i=1,mat%rank
        IF( mat%piv(i) .NE. i) base = -base
        base = mat%val(ib,i)*base
        IF( base .EQ. 0.0d0 ) THEN
           WRITE(*,*) 'DETERMINANT_GB: matrix is singular'
           STOP '*** Abnormal EXIT in MODULE matrix ***'
        END IF
        DO
           IF( ABS(base) .GE. 1.0d0 ) EXIT
           base = 10.0d0*base
           pow = pow - 1
        END DO
        DO
           IF( ABS(base) .LT. 10.0d0 ) EXIT
           base = base/10.0d0
           pow = pow + 1
        END DO
     END DO
   END SUBROUTINE determinant_gb
 !===========================================================================
   SUBROUTINE determinant_pb(mat, base, pow)
 !
 !   Return the determinant of mat
 !
     TYPE(pbmat) :: mat
     INTEGER :: pow, i, ib
     DOUBLE PRECISION :: base
 !
     CALL factor(mat)
     base = 1.0d0
     pow = 0
     ib = mat%ku + 1
     DO i=1,mat%rank
        base = mat%val(ib,i)*base
        IF( base .EQ. 0.0d0 ) THEN
           WRITE(*,*) 'DETERMINANT_PB: matrix is singular'
           STOP '*** Abnormal EXIT in MODULE matrix ***'
        END IF
        DO
           IF( ABS(base) .GE. 1.0d0 ) EXIT
           base = 10.0d0*base
           pow = pow - 1
        END DO
        DO
           IF( ABS(base) .LT. 10.0d0 ) EXIT
           base = base/10.0d0
           pow = pow + 1
        END DO
     END DO
     base=base**2
     pow=pow*2
   END SUBROUTINE determinant_pb
 !===========================================================================
   SUBROUTINE determinant_zge(mat, base, pow)
 !
 !   Return the determinant of mat
 !
     TYPE(zgemat) :: mat
     INTEGER :: pow, i
     DOUBLE COMPLEX :: base
 !
     CALL factor(mat)
     base = 1.0d0
     pow = 0
     DO i=1,mat%rank
        IF( mat%piv(i) .NE. i) base = -base
        base = mat%val(i,i)*base
        IF( base .EQ. 0.0d0 ) THEN
           WRITE(*,*) 'DETERMINANT_ZGE: matrix is singular'
           STOP '*** Abnormal EXIT in MODULE matrix ***'
        END IF
        DO
           IF( ABS(base) .GE. 1.0d0 ) EXIT
           base = 10.0d0*base
           pow = pow - 1
        END DO
        DO
           IF( ABS(base) .LT. 10.0d0 ) EXIT
           base = base/10.0d0
           pow = pow + 1
        END DO
     END DO
   END SUBROUTINE determinant_zge
 !===========================================================================
   SUBROUTINE determinant_zgb(mat, base, pow)
 !
 !   Return the determinant of mat
 !
     TYPE(zgbmat) :: mat
     INTEGER :: pow, i, ib
     DOUBLE COMPLEX :: base
 !
     CALL factor(mat)
     base = 1.0d0
     pow = 0
     ib=mat%kl + mat%ku + 1
     DO i=1,mat%rank
        IF( mat%piv(i) .NE. i) base = -base
        base = mat%val(ib,i)*base
        IF( base .EQ. 0.0d0 ) THEN
           WRITE(*,*) 'DETERMINANT_GB: matrix is singular'
           STOP '*** Abnormal EXIT in MODULE matrix ***'
        END IF
        DO
           IF( ABS(base) .GE. 1.0d0 ) EXIT
           base = 10.0d0*base
           pow = pow - 1
        END DO
        DO
           IF( ABS(base) .LT. 10.0d0 ) EXIT
           base = base/10.0d0
           pow = pow + 1
        END DO
     END DO
   END SUBROUTINE determinant_zgb
 !===========================================================================
   SUBROUTINE determinant_zpb(mat, base, pow)
 !
 !   Return the determinant of mat
 !
     TYPE(zpbmat) :: mat
     INTEGER :: pow, i, ib
     DOUBLE COMPLEX :: base
 !
     CALL factor(mat)
     base = 1.0d0
     pow = 0
     ib = mat%ku + 1
     DO i=1,mat%rank
        base = mat%val(ib,i)*base
        IF( base .EQ. 0.0d0 ) THEN
           WRITE(*,*) 'DETERMINANT_PB: matrix is singular'
           STOP '*** Abnormal EXIT in MODULE matrix ***'
        END IF
        DO
           IF( ABS(base) .GE. 1.0d0 ) EXIT
           base = 10.0d0*base
           pow = pow - 1
        END DO
        DO
           IF( ABS(base) .LT. 10.0d0 ) EXIT
           base = base/10.0d0
           pow = pow + 1
        END DO
     END DO
     base=base**2
     pow=pow*2
   END SUBROUTINE determinant_zpb
 !===========================================================================
   SUBROUTINE putmat_gb(fid, label, mat, str)
 !
 !   Write GB matrix in hdf5 file
 !
     USE futils
     INTEGER, INTENT(in)                    :: fid
     CHARACTER(len=*), INTENT(in)           :: label
     TYPE(gbmat)                            :: mat
     CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
     IF(PRESENT(str)) THEN
        CALL putarr(fid, label, mat%val, str)
     ELSE
        CALL putarr(fid, label, mat%val)
     END IF
     CALL attach(fid, label, 'KL', mat%kl)
     CALL attach(fid, label, 'KU', mat%ku)
     CALL attach(fid, label, 'RANK', mat%rank)
   END SUBROUTINE putmat_gb
 !===========================================================================
   SUBROUTINE getmat_gb(fid, label, mat, str)
 !
 !   Read in GB matrix from hdf5 file
 !
     USE futils
     INTEGER, INTENT(in)                    :: fid
     CHARACTER(len=*), INTENT(in)           :: label
     TYPE(gbmat)                            :: mat
     CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
     CALL getatt(fid, label, 'KL', mat%kl)
     CALL getatt(fid, label, 'KU', mat%ku)
     CALL getatt(fid, label, 'RANK', mat%rank)
     CALL getarr(fid, label, mat%val)
   END SUBROUTINE getmat_gb
 !===========================================================================
   SUBROUTINE kron_ge(mata, matb, matc)
 !
 !  Krocnecker product of 2 dense matrices
 !
     TYPE(gemat), INTENT(in)  :: mata, matb
     TYPE(gemat), INTENT(out) :: matc
 !
     INTEGER :: i1, j1, i3, j3, m1, n1, m2, n2, m3, n3
 !
     m1 = mata%mrows
     n1 = mata%ncols
     m2 = matb%mrows
     n2 = matb%ncols
     m3 = m1*m2
     n3 = n1*n2
 !
     CALL init(n3, 0, matc, mrows=m3)
     DO i1=1,m1
        i3 = (i1-1)*m2
        DO j1=1,n1
           j3 = (j1-1)*n2
           matc%val(i3+1:i3+m2,j3+1:j3+n2) = mata%val(i1,j1)*matb%val(1:m2,1:n2)
        END DO
     END DO
   END SUBROUTINE kron_ge
 !===========================================================================
 END MODULE matrix
diff --git a/src/multigrid_mod.f90 b/src/multigrid_mod.f90
index 192c76c..58173b8 100644
--- a/src/multigrid_mod.f90
+++ b/src/multigrid_mod.f90
@@ -1,2373 +1,2373 @@
 !>
 !> @file multigrid_mod.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE multigrid
 !
 !    MULTIGRID: Implement Multigrid solver for Finite Elements
 !               and Fiinite Differences.
 !
 !    T.M. Tran, CRPP-EPFL
 !    September 2012
 !
   USE bsplines
   USE matrix
   USE conmat_mod
   USE csr
   USE cds
   IMPLICIT NONE
 !
   TYPE grid1d
      INTEGER                         :: n    ! Number of intervals
      INTEGER                         :: rank ! Dimension of FE space
      DOUBLE PRECISION                :: h
      DOUBLE PRECISION, ALLOCATABLE   :: x(:)
      DOUBLE PRECISION, ALLOCATABLE   :: v(:)
      DOUBLE PRECISION, ALLOCATABLE   :: f(:)
      TYPE(spline1d)                  :: spl
      TYPE(gemat)                     :: transf    ! Coarse to fine transfer matrix
      TYPE(gbmat), ALLOCATABLE        :: mata      ! FE matrix
      TYPE(gbmat), ALLOCATABLE        :: matm      ! mass matrix
      TYPE(gbmat), ALLOCATABLE        :: mata_copy ! Used for direct_solve
      TYPE(gemat), ALLOCATABLE        :: matap     ! FE matrix
      TYPE(gemat), ALLOCATABLE        :: matmp     ! mass matrix
      TYPE(gemat), ALLOCATABLE        :: matap_copy! Used for direct_solve
   END TYPE grid1d
 !
   TYPE grid2d
      INTEGER                         :: n(2)    ! Number of intervals
      INTEGER                         :: rank(2) ! Dimension of FE space
      DOUBLE PRECISION                :: h(2)
      DOUBLE PRECISION, ALLOCATABLE   :: x(:), y(:)
 !
      DOUBLE PRECISION, ALLOCATABLE :: v(:,:) ! sol
      DOUBLE PRECISION, ALLOCATABLE :: f(:,:) ! rhs
      DOUBLE PRECISION, POINTER     :: v1d(:) ! flatten sol
      DOUBLE PRECISION, POINTER     :: f1d(:) ! flatten rhs
 !
      TYPE(csr_mat),ALLOCATABLE  :: mata
      TYPE(cds_mat),ALLOCATABLE  :: mata_cds
      TYPE(spline2d) :: spl
      TYPE(gemat)    :: transf(2)
 !
      TYPE(csr_mat)  :: matp(2)
   END TYPE grid2d
 !
   TYPE mg_info
      INTEGER :: nu1    ! Relaxation down sweeps
      INTEGER :: nu2    ! Relaxation up sweeps
      INTEGER :: mu     ! mu-cycle number
      INTEGER :: nu0    ! Number of FMG cycles
      INTEGER :: levels ! Number of mg levels
      CHARACTER(len=4) :: relax ! Type of relation
      DOUBLE PRECISION :: omega ! for weighted Jacobi relaxation
      LOGICAL          :: nlscale=.FALSE. ! Scale restriction if .TRUE.
   END TYPE mg_info
 !
   INTERFACE create_grid
      MODULE PROCEDURE create_grid_1d, create_grid_2d
   END INTERFACE create_grid
   INTERFACE disrhs
      MODULE PROCEDURE disrhs_1d, disrhs_2d
   END INTERFACE disrhs
   INTERFACE direct_solve
      MODULE PROCEDURE direct_solve_1d, direct_solve_2d
   END INTERFACE direct_solve
   INTERFACE mg
      MODULE PROCEDURE mg_1d, mg_2d
   END INTERFACE mg
   INTERFACE disc_err
      MODULE PROCEDURE disc_err_1d, disc_err_2d
   END INTERFACE disc_err
   INTERFACE jacobi
      MODULE PROCEDURE jacobi_cds, jacobi_csr, jacobi_gb, jacobi_ge
   END INTERFACE jacobi
   INTERFACE gs
      MODULE PROCEDURE gs_cds, gs_csr, gs_gb, gs_ge
   END INTERFACE gs
   INTERFACE restrict
      MODULE PROCEDURE restrict_1d, restrict_2d, restrict_2d_csr
   END INTERFACE restrict
   INTERFACE prolong
      MODULE PROCEDURE prolong_1d, prolong_2d, prolong_2d_csr
   END INTERFACE prolong
   INTERFACE printmat
      MODULE PROCEDURE printmat_mat, printmat_ge, printmat_gb, printmat_periodic
   END INTERFACE printmat
   INTERFACE massmat
      MODULE PROCEDURE massmat_ge, massmat_gb, massmat_periodic
   END INTERFACE massmat
   INTERFACE femat
      MODULE PROCEDURE femat_2d_csr, femat_ge, femat_gb, femat_periodic
   END INTERFACE femat
   INTERFACE ibcmat
      MODULE PROCEDURE ibcmat_1d, ibcmat_2d
   END INTERFACE ibcmat
   INTERFACE mod_transf
      MODULE PROCEDURE mod_transf_full, mod_transf_csr
   END INTERFACE mod_transf
   INTERFACE normf
      MODULE PROCEDURE normf_gb, normf_ge
   END INTERFACE normf
   INTERFACE residue
      MODULE PROCEDURE residue_gen, residue_csr, residue_cds, residue_gb, residue_ge
   END INTERFACE residue
 !
 CONTAINS
 !--------------------------------------------------------------------------------
   SUBROUTINE create_grid_1d(n, nidbas, ng_in, alpha, grids, period)
 !
 !    Create an array of levels  grids
 !    Compute mass matrix and prolongation matrices.
 !
     INTEGER, INTENT(in) :: n      ! Number of intervals in the finest grid
     INTEGER, INTENT(in) :: nidbas ! Order of splines
     INTEGER, INTENT(in) :: ng_in  ! Number of proposed Gauss points
     INTEGER, INTENT(in) :: alpha  ! geometric exponent
     TYPE(grid1d), INTENT(out)     :: grids(:)
     LOGICAL, INTENT(in), OPTIONAL :: period
 !
     LOGICAL :: nlper
     INTEGER :: n_current, nrank, ngauss
     INTEGER :: levels, l, i
     DOUBLE PRECISION :: h_current
     TYPE(gbmat) :: matm
     TYPE(gemat) :: matmp
 !
     levels = SIZE(grids)
     nlper = .FALSE.
     IF(PRESENT(period)) nlper = period
 !    
     ngauss = CEILING(REAL(2*nidbas+alpha+1,8)/2.d0)
     ngauss = MAX(ng_in, ngauss)
     WRITE(*,'(a,i0)') 'ngauss = ', ngauss
 !
 !    Allocate some matrices
 !
     DO l=1,levels
        IF(nlper) THEN
           ALLOCATE(grids(l)%matmp)
           ALLOCATE(grids(l)%matap)
        ELSE
           ALLOCATE(grids(l)%matm)
           ALLOCATE(grids(l)%mata)
        END IF
     END DO
 !
     n_current = n
     h_current = 1.0d0/REAL(n_current,8)
     DO l=1,levels
        IF(n_current .LT. 2 ) THEN
           PRINT*, 'CREATE_GRID: number intervals too small!'
           STOP
        END IF
        grids(l)%n = n_current
        grids(l)%h = h_current
        ALLOCATE(grids(l)%x(0:n_current))
        grids(l)%x(0:n_current) = (/ (REAL(i,8)*h_current, i=0,n_current) /)
        CALL set_spline(nidbas, ngauss, grids(l)%x, grids(l)%spl, period=nlper)
        CALL get_dim(grids(l)%spl, nrank)
        IF(nlper) nrank = n_current
        grids(l)%rank = nrank
        ALLOCATE(grids(l)%v(nrank))
        ALLOCATE(grids(l)%f(nrank))
        IF(nlper) THEN
           CALL massmat(grids(l)%spl, alpha, grids(l)%matmp)
        ELSE
           CALL massmat(grids(l)%spl, alpha, grids(l)%matm)
        END IF
        IF(l.GT.1) THEN 
           CALL ctof_massmat(grids(l-1)%spl, grids(l)%spl, alpha, grids(l)%transf)
           IF(nlper) THEN
              CALL mcopy(grids(l-1)%matmp, matmp)
              CALL factor(matmp)
              CALL bsolve(matmp, grids(l)%transf%val)
              CALL destroy(matmp)
           ELSE
              CALL mcopy(grids(l-1)%matm, matm)
              CALL factor(matm)
              CALL bsolve(matm, grids(l)%transf%val)
              CALL destroy(matm)
           END IF
       END IF
        n_current = n_current/2
        h_current = 2.0d0*h_current
     END DO
   END SUBROUTINE create_grid_1d
 !--------------------------------------------------------------------------------
   SUBROUTINE create_grid_2d(x, y, nidbas, ng_in, alpha, grids, mat_type, period, &
        &                   debug_in)
 !
 !    Create an array of levels  grids
 !    Compute mass matrix and prolongation matrices.
 !
     DOUBLE PRECISION, INTENT(in) :: x(0:), y(0:) ! Finest grid points
     INTEGER, INTENT(in)          :: nidbas(2)    ! Order of splines
     INTEGER, INTENT(in)          :: alpha(2)     ! geometric exponent
     INTEGER, INTENT(in)          :: ng_in(2)     ! Number of proposed Gauss points
     TYPE(grid2d), INTENT(out), TARGET :: grids(:)
     CHARACTER(*), INTENT(in), OPTIONAL :: mat_type ! csr (default) or cds
     LOGICAL, INTENT(in), OPTIONAL     :: period(2)
     LOGICAL, INTENT(in), OPTIONAL     :: debug_in
 !
     LOGICAL, DIMENSION(2) :: nlper
     INTEGER, DIMENSION(2) :: n, sp_dim, ngauss
     LOGICAL     :: nlcds
     LOGICAL     :: debug
     INTEGER     :: levels, l, rank2d
     TYPE(gemat) :: matm
 !
     DOUBLE PRECISION, PARAMETER :: pi = 4.0d0*ATAN(1.0d0)
 !
 !  Process input args
 !
     n(1) = SIZE(x)-1
     n(2) = SIZE(y)-1
     levels = SIZE(grids)
     nlper = .FALSE.
     IF(PRESENT(period)) THEN 
        nlper = period
     END IF
     IF(PRESENT(debug_in)) THEN
        debug = debug_in
     ELSE
        debug = .FALSE.
     END IF
     IF(PRESENT(mat_type)) THEN  ! CSR matrix by default
        nlcds = mat_type.EQ.'cds'
     ELSE
        nlcds = .FALSE.
     END IF
 !
 !  WARNING: Assume that only 2nd dim can be periodic!!!
     IF(nlper(1)) THEN
        WRITE(*,'(A)') 'CREATE_GRID: First dimension could not be periodic!'
        STOP
     END IF
 !
     ngauss = CEILING(REAL(2*nidbas+1,8)/2.d0)
     ngauss = MAX(ng_in, ngauss)
     WRITE(*,'(a,2i4)') 'ngauss = ', ngauss
 !
     DO l=1,levels
 !
 !  Create mesh from finest grid mesh
        IF(MINVAL(n) .LT. 2 ) THEN
           PRINT*, 'CREATE_GRID: number intervals too small!'
           STOP
        END IF
        grids(l)%n = n
        ALLOCATE(grids(l)%x(0:n(1)))
        ALLOCATE(grids(l)%y(0:n(2)))
        IF(l.EQ.1) THEN
           grids(1)%x = x
           grids(1)%y = y
        ELSE
           grids(l)%x(:) = grids(l-1)%x(0::2)
           grids(l)%y(:) = grids(l-1)%y(0::2)
        END IF
        IF(debug) THEN
           WRITE(*,'(/a,i4,a,2l2)') 'l =', l, '  nlper =', nlper
           WRITE(*,'(a/(10(1pe12.3)))') 'x', grids(l)%x
           WRITE(*,'(a/(10(1pe12.3)))') 'y', grids(l)%y
        END IF
 !
 !  Allocate mem for solution v and RHS f
        CALL set_spline(nidbas, ngauss, grids(l)%x, grids(l)%y, grids(l)%spl, period=nlper)
        CALL get_dim(grids(l)%spl%sp1, sp_dim(1))
        CALL get_dim(grids(l)%spl%sp2, sp_dim(2))
        ALLOCATE(grids(l)%v(sp_dim(1), sp_dim(2)))
        ALLOCATE(grids(l)%f(sp_dim(1), sp_dim(2)))
 !
 !  WARNING: Assume that only 2nd dim can be periodic!!!
        grids(l)%rank = sp_dim
        IF(nlper(2)) THEN
           grids(l)%rank(2) = n(2)
        END IF
        IF(debug) THEN
           WRITE(*,'(a,2i6)') 'Grid ranks', grids(l)%rank
        END IF
 !
 !  Flatten version of sol and rhs
        rank2d = PRODUCT(grids(l)%rank)
        grids(l)%v1d(1:rank2d) => grids(l)%v
        grids(l)%f1d(1:rank2d) => grids(l)%f
 !
 !  Matrix format for FE matrix
        IF(nlcds) THEN
           ALLOCATE(grids(l)%mata_cds)
        ELSE
           ALLOCATE(grids(l)%mata)
        END IF
 !
 !  Coarse to fine mesh transfers for l>1
        IF(l.GT.1) THEN
           CALL ctof_massmat(grids(l-1)%spl%sp1, grids(l)%spl%sp1, alpha(1), grids(l)%transf(1))
           CALL ctof_massmat(grids(l-1)%spl%sp2, grids(l)%spl%sp2, alpha(2), grids(l)%transf(2))
 !
           CALL massmat(grids(l-1)%spl%sp1, alpha(1), matm)
           CALL factor(matm)
           CALL bsolve(matm, grids(l)%transf(1)%val)
           CALL full_to_csr(grids(l)%transf(1)%val, grids(l)%matp(1))
           CALL destroy(matm)
           CALL destroy(grids(l)%transf(1))
 !
           CALL massmat(grids(l-1)%spl%sp2, alpha(2), matm)
           CALL factor(matm)
           CALL bsolve(matm, grids(l)%transf(2)%val)
           CALL full_to_csr(grids(l)%transf(2)%val, grids(l)%matp(2))
           CALL destroy(matm)
           CALL destroy(grids(l)%transf(2))
        END IF
 !
 !   Next coarse grid
        n = n/2
     END DO
   END SUBROUTINE create_grid_2d
 !--------------------------------------------------------------------------------
   SUBROUTINE create_grid_fd(x, y, grids, info, mat_type, period, debug)
 !
 !    FD version of create_grid
 !
     DOUBLE PRECISION, INTENT(in)       :: x(0:), y(0:)
     TYPE(grid2d), INTENT(out), TARGET  :: grids(:)
     TYPE(mg_info), INTENT(inout)       :: info      ! info for MG
     CHARACTER(*), INTENT(in), OPTIONAL :: mat_type  ! csr (default) or cds
     LOGICAL, INTENT(in), OPTIONAL      :: period(2)
     LOGICAL, INTENT(in), OPTIONAL      :: debug
 !
     INTEGER :: nidbas(2)=1, ngauss(2)=4    ! Linear Splines \equiv 1st FD
     INTEGER :: alpha(2) = 1                ! Cartesian coordinate
     LOGICAL, DIMENSION(2) :: nlper
     LOGICAL :: nldebug
     LOGICAL :: nlcds
     INTEGER :: levels, n(2), sp_dim(2)
     INTEGER :: l, rank2d    
     TYPE(gemat)  :: matm
 !--------------------------------------------------------------------------------    
 !
 !  Process input args
 !
     n(1) = SIZE(x)-1
     n(2) = SIZE(y)-1
     levels = SIZE(grids)
     info%nlscale = .TRUE.   ! Restriction should be scaled for FD
     nlper = .FALSE.
     IF(PRESENT(period)) nlper = period
     nldebug = .FALSE.
     IF(PRESENT(debug))  nldebug = debug
     IF(PRESENT(mat_type)) THEN  ! CSR matrix by default
        nlcds = mat_type.EQ.'cds'
     ELSE
        nlcds = .FALSE.
     END IF
 !
     DO l=1,levels
 !
 !  Create mesh from finest grid mesh
        IF(MINVAL(n) .LT. 2 ) THEN
           PRINT*, 'CREATE_GRID: number intervals too small!'
           STOP
        END IF
        grids(l)%n = n
        ALLOCATE(grids(l)%x(0:n(1)))
        ALLOCATE(grids(l)%y(0:n(2)))
        IF(l.EQ.1) THEN
           grids(1)%x = x
           grids(1)%y = y
        ELSE
           grids(l)%x(:) = grids(l-1)%x(0::2)
           grids(l)%y(:) = grids(l-1)%y(0::2)
        END IF
        IF(nldebug) THEN
           WRITE(*,'(/a,i4,a,2l2)') 'l =', l, '  nlper =', nlper
           WRITE(*,'(a/(10(1pe12.3)))') 'x', grids(l)%x
           WRITE(*,'(a/(10(1pe12.3)))') 'y', grids(l)%y
        END IF
 !
 !  Allocate mem for solution v and RHS f
        CALL set_spline(nidbas, ngauss, grids(l)%x, grids(l)%y, grids(l)%spl, period=nlper)
        CALL get_dim(grids(l)%spl%sp1, sp_dim(1))
        CALL get_dim(grids(l)%spl%sp2, sp_dim(2))
        ALLOCATE(grids(l)%v(0:sp_dim(1)-1, 0:sp_dim(2)-1))
        ALLOCATE(grids(l)%f(0:sp_dim(1)-1, 0:sp_dim(2)-1))
 !
 !  WARNING: Assume that only 2nd dim can be periodic!!!
        grids(l)%rank = sp_dim
        IF(nlper(2)) THEN
           grids(l)%rank(2) = n(2)
        END IF
        IF(nldebug) THEN
           WRITE(*,'(a,2i6)') 'Grid ranks', grids(l)%rank
        END IF
 !
 !  Flatten version of sol and rhs
        rank2d = PRODUCT(grids(l)%rank)
        grids(l)%v1d(1:rank2d) => grids(l)%v
        grids(l)%f1d(1:rank2d) => grids(l)%f
 !
 !  Matrix format for FD matrix
        IF(nlcds) THEN
           ALLOCATE(grids(l)%mata_cds)
        ELSE
           ALLOCATE(grids(l)%mata)
        END IF
 !
 !  Coarse to fine mesh transfers for l>1
        IF(l.GT.1) THEN
           CALL ctof_massmat(grids(l-1)%spl%sp1, grids(l)%spl%sp1, alpha(1), grids(l)%transf(1))
           CALL ctof_massmat(grids(l-1)%spl%sp2, grids(l)%spl%sp2, alpha(2), grids(l)%transf(2))
 !
           CALL massmat(grids(l-1)%spl%sp1, alpha(1), matm)
           CALL factor(matm)
           CALL bsolve(matm, grids(l)%transf(1)%val)
           CALL full_to_csr(grids(l)%transf(1)%val, grids(l)%matp(1))
           CALL destroy(matm)
           CALL destroy(grids(l)%transf(1))
 !
           CALL massmat(grids(l-1)%spl%sp2, alpha(2), matm)
           CALL factor(matm)
           CALL bsolve(matm, grids(l)%transf(2)%val)
           CALL full_to_csr(grids(l)%transf(2)%val, grids(l)%matp(2))
           CALL destroy(matm)
           CALL destroy(grids(l)%transf(2))
        END IF
 !
 !   Next coarse grid
        n = n/2
     END DO
   END SUBROUTINE create_grid_fd
 !--------------------------------------------------------------------------------
   RECURSIVE SUBROUTINE fmg(grids, info, l)
 !
 !   Execute a full multigrid V-cycle
 !
     TYPE(grid1d), INTENT(inout) :: grids(:)
     TYPE(mg_info), INTENT(in)    :: info
     INTEGER, INTENT(in)          :: l
     INTEGER :: levels, k
     levels = info%levels
 !
     IF(l.EQ.levels) THEN
        CALL direct_solve(grids(levels), grids(levels)%v)
     ELSE
        grids(l+1)%f = restrict(grids(l+1)%transf,grids(l)%f)
        CALL fmg(grids, info, l+1)       
        grids(l)%v = prolong(grids(l+1)%transf,grids(l+1)%v)
        DO k=1,info%nu0
           CALL mg(grids, info, l)
        END DO
     END IF
   END SUBROUTINE fmg
 !--------------------------------------------------------------------------------
   RECURSIVE SUBROUTINE mg_1d(grids, info, l)
 !
 !   Execute a recursive V-cycle
 !
     TYPE(grid1d), INTENT(inout)  :: grids(:)
     TYPE(mg_info), INTENT(in)    :: info
     INTEGER, INTENT(in)          :: l
     INTEGER :: levels, k
     LOGICAL :: nlper
 !
     levels = info%levels
     nlper = grids(1)%spl%period
 !
     IF(l.EQ.levels) THEN
        CALL direct_solve(grids(levels), grids(levels)%v)
     ELSE
        CALL relax(info%nu1)
        IF(nlper) THEN
           grids(l+1)%f = restrict(grids(l+1)%transf, &
                &   grids(l)%f-vmx(grids(l)%matap, grids(l)%v))
        ELSE
           grids(l+1)%f = restrict(grids(l+1)%transf, &
                &   grids(l)%f-vmx(grids(l)%mata, grids(l)%v))
        END IF
        grids(l+1)%v = 0.0d0
 !
 !   Only 1 call to the coarsest level
        DO k=1,MERGE(info%mu, 1, (l+1).NE.levels) 
           CALL mg(grids, info, l+1)
        END DO
 !
        grids(l)%v = grids(l)%v + prolong(grids(l+1)%transf,grids(l+1)%v)
        CALL relax(info%nu2)
     END IF
 !
   CONTAINS
     SUBROUTINE relax(nu)
       INTEGER, INTENT(in) :: nu
       SELECT CASE (TRIM(info%relax))
       CASE ("jac")
          IF(nlper) THEN
             CALL jacobi(grids(l)%matap, info%omega, nu, grids(l)%v, grids(l)%f)
          ELSE
             CALL jacobi(grids(l)%mata, info%omega, nu, grids(l)%v, grids(l)%f)
          END IF
       CASE ("gs")
          IF(nlper) THEN
             CALL gs(grids(l)%matap, nu, grids(l)%v, grids(l)%f)
          ELSE
             CALL gs(grids(l)%mata, nu, grids(l)%v, grids(l)%f)
          END IF
       CASE default
          PRINT*, "relax ", info%relax, " NOT IMPLEMENTED!"
          STOP
       END SELECT
     END SUBROUTINE relax
   END SUBROUTINE mg_1d
 !--------------------------------------------------------------------------------
   RECURSIVE SUBROUTINE mg_2d(grids, info, l)
 !
 !   Execute a recursive V-cycle
 !
     TYPE(grid2d), INTENT(inout)  :: grids(:)
     TYPE(mg_info), INTENT(in)    :: info
     INTEGER, INTENT(in)          :: l
 !
     DOUBLE PRECISION, ALLOCATABLE, TARGET :: resid(:,:)
     DOUBLE PRECISION, POINTER             :: resid1d(:)
     INTEGER :: levels, k, m1, m2
 !
     levels = info%levels
     m1 = SIZE(grids(l)%v,1)
     m2 = SIZE(grids(l)%v,2)
 !
     IF(l.EQ.levels) THEN
        grids(levels)%v = grids(levels)%f
        CALL direct_solve(grids(levels), grids(levels)%v1d)
     ELSE
        CALL relax(info%nu1)
        ALLOCATE(resid(m1,m2)); resid1d(1:m1*m2) => resid
        IF(ALLOCATED(grids(l)%mata)) THEN
           resid1d = grids(l)%f1d - vmx(grids(l)%mata, grids(l)%v1d)
        ELSE
           resid1d = grids(l)%f1d - vmx(grids(l)%mata_cds, grids(l)%v1d)
        END IF
        grids(l+1)%f = restrict(grids(l+1)%matp, resid)
        IF(info%nlscale) grids(l+1)%f = 0.25d0*grids(l+1)%f
        DEALLOCATE(resid)
        grids(l+1)%v = 0.0d0
 !
 !   Only 1 call to the coarsest level
        DO k=1,MERGE(info%mu, 1, (l+1).NE.levels) 
           CALL mg(grids, info, l+1)
        END DO
 !
        grids(l)%v = grids(l)%v + prolong(grids(l+1)%matp,grids(l+1)%v)
        CALL relax(info%nu2)
     END IF
 !
   CONTAINS
     SUBROUTINE relax(nu)
       INTEGER, INTENT(in) :: nu
       SELECT CASE (TRIM(info%relax))
       CASE ("jac")
          IF(ALLOCATED(grids(l)%mata)) THEN
             CALL jacobi(grids(l)%mata, info%omega, nu, grids(l)%v1d, grids(l)%f1d)
          ELSE
             CALL jacobi(grids(l)%mata_cds, info%omega, nu, grids(l)%v1d, grids(l)%f1d)
          END IF
       CASE ("gs")
          IF(ALLOCATED(grids(l)%mata)) THEN
             CALL gs(grids(l)%mata, nu, grids(l)%v1d, grids(l)%f1d)
          ELSE
             CALL gs(grids(l)%mata_cds, nu, grids(l)%v1d, grids(l)%f1d)
          END IF
       CASE default
          PRINT*, "relax ", info%relax, " NOT IMPLEMENTED!"
          STOP
       END SELECT
     END SUBROUTINE relax
   END SUBROUTINE mg_2d
 !--------------------------------------------------------------------------------
   RECURSIVE SUBROUTINE mg_cyl(grids, info, l, nluniq_in)
 !
 !   Execute a recursive V-cycle
 !
     TYPE(grid2d), INTENT(inout)   :: grids(:)
     TYPE(mg_info), INTENT(in)     :: info
     INTEGER, INTENT(in)           :: l
     LOGICAL, INTENT(in), OPTIONAL :: nluniq_in
 !
     DOUBLE PRECISION, ALLOCATABLE, TARGET :: resid(:,:)
     DOUBLE PRECISION, POINTER             :: resid1d(:)
     INTEGER :: levels, k, m1, m2, r1, r2
     LOGICAL :: nluniq
 !
     levels = info%levels
     m1 = SIZE(grids(l)%v,1)
     m2 = SIZE(grids(l)%v,2)
     r1 = grids(l)%rank(1)     ! r1 = m1
     r2 = grids(l)%rank(2)     ! r2 = m2-1
     IF(PRESENT(nluniq_in)) THEN
        nluniq = nluniq_in
     ELSE
        nluniq = .TRUE.
     END IF
 !
     IF(l.EQ.levels) THEN
        grids(levels)%v1d = grids(levels)%f1d
        CALL direct_solve(grids(levels), grids(levels)%v1d, debug=.FALSE.)
     ELSE
        CALL relax(info%nu1)
        ALLOCATE(resid(m1,m2)); resid1d(1:r1*r2) => resid
        IF(ALLOCATED(grids(l)%mata)) THEN
           resid1d(:) = grids(l)%f1d(:) - vmx(grids(l)%mata, grids(l)%v1d)
        ELSE
           resid1d(:) = grids(l)%f1d(:) - vmx(grids(l)%mata_cds, grids(l)%v1d)
        END IF
 !
        grids(l+1)%f(:,:) = restrict_cyl(grids(l+1), resid, nluniq)
 !
        DEALLOCATE(resid)
        grids(l+1)%v1d = 0.0d0
 !
 !   Only 1 call to the coarsest level
        DO k=1,MERGE(info%mu, 1, (l+1).NE.levels) 
           CALL mg_cyl(grids, info, l+1, nluniq)
        END DO
 !
        grids(l)%v(:,1:r2) = grids(l)%v(:,1:r2) + &
             &               prolong_cyl(grids(l+1),grids(l+1)%v, nluniq)
 !
        CALL relax(info%nu2)
     END IF
 !
   CONTAINS
     SUBROUTINE relax(nu)
       INTEGER, INTENT(in) :: nu
       SELECT CASE (TRIM(info%relax))
       CASE ("jac")
          IF(ALLOCATED(grids(l)%mata)) THEN
             CALL jacobi(grids(l)%mata, info%omega, nu, grids(l)%v1d, grids(l)%f1d)
          ELSE
             CALL jacobi(grids(l)%mata_cds, info%omega, nu, grids(l)%v1d, grids(l)%f1d)
          END IF
       CASE ("gs")
          IF(ALLOCATED(grids(l)%mata)) THEN
             CALL gs(grids(l)%mata, nu, grids(l)%v1d, grids(l)%f1d)
          ELSE
             CALL gs(grids(l)%mata_cds, nu, grids(l)%v1d, grids(l)%f1d)
         END IF
       CASE default
          PRINT*, "relax ", info%relax, " NOT IMPLEMENTED!"
          STOP
       END SELECT
     END SUBROUTINE relax
   END SUBROUTINE mg_cyl
 !--------------------------------------------------------------------------------
   FUNCTION prolong_1d(matp,vcoarse) RESULT(vfine)
 !
 !   Prolongation
 !
     TYPE(gemat), INTENT(in) :: matp
     DOUBLE PRECISION, INTENT(in) :: vcoarse(:)
     DOUBLE PRECISION             :: vfine(matp%mrows)
 !
     vfine = vmx(matp,vcoarse)
   END FUNCTION prolong_1d
 !--------------------------------------------------------------------------------
   FUNCTION restrict_1d(matp,vfine) RESULT(vcoarse)
 !
 !   Restriction
 !
     TYPE(gemat), INTENT(in) :: matp
     DOUBLE PRECISION, INTENT(in) :: vfine(:)
     DOUBLE PRECISION             :: vcoarse(matp%ncols)
 !
     vcoarse = vmx(matp,vfine,'T')
   END FUNCTION restrict_1d
 !--------------------------------------------------------------------------------
   FUNCTION prolong_2d(matp,vcoarse) RESULT(vfine)
 !
 !   Prolongation
 !
     TYPE(gemat), INTENT(in) :: matp(2)
     DOUBLE PRECISION, INTENT(in)  :: vcoarse(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: vfine(:,:)
 !
     DOUBLE PRECISION, POINTER     :: pmat1(:,:), pmat2(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: temp(:,:)
     DOUBLE PRECISION :: one=1.0d0, zero=0.0d0
     INTEGER :: m1, m1p, m2, m2p
 !
     pmat1 => matp(1)%val
     pmat2 => matp(2)%val
     m1 = SIZE(pmat1,1); m1p = SIZE(pmat1,2)
     m2 = SIZE(pmat2,1); m2p = SIZE(pmat2,2)
     ALLOCATE(vfine(m1,m2))
     ALLOCATE(temp(m1,m2p))
 !
 !    Compute (P1) * V
 !
     CALL dgemm('N', 'N', m1, m2p, m1p, one, pmat1, m1, vcoarse, m1p, zero, &
          &     temp, m1)
 !
 !    Compute (P1) * V * (P2)^T
 !
     CALL dgemm('N', 'T', m1, m2, m2p, one, temp, m1, pmat2, m2, zero, &
          &     vfine, m1)
 !
     DEALLOCATE(temp)
   END FUNCTION prolong_2d
 !--------------------------------------------------------------------------------
   FUNCTION prolong_2d_csr(matp,vcoarse) RESULT(vfine)
 !
 !   Prolongation using CSR prolongation matrix
 !
     TYPE(csr_mat), INTENT(in)     :: matp(2)
     DOUBLE PRECISION, INTENT(in)  :: vcoarse(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: vfine(:,:)
 !
     DOUBLE PRECISION, ALLOCATABLE :: temp(:,:)
     INTEGER :: m1, m1p, m2, m2p
     INTEGER :: i, j, k, kk
 !
     m1 = matp(1)%mrows; m1p = matp(1)%ncols
     m2 = matp(2)%mrows; m2p = matp(2)%ncols
     ALLOCATE(vfine(m1,m2))
     ALLOCATE(temp(m1p,m2))
     temp = 0.0d0
     vfine = 0.0d0
 !
 !    Compute temp = V * (P2)^T
 !     t_ij = sum_{k=1}^{m2p} V_ik (P2)_jk, i=1:m1p, j=1:m2
 !
     DO j=1,m2
        DO kk=matp(2)%irow(j),matp(2)%irow(j+1)-1
           k = matp(2)%cols(kk)
           temp(:,j) = temp(:,j) + vcoarse(:,k)*matp(2)%val(kk)
        END DO
     END DO
 !
 !    Compute (P1) * V * (P2)^T
 !     V_ij = sum_{k=1}^{m1p} (P1)_ik t_kj, i=1:m1, j=1:m2
 !
     DO i=1,m1
        DO kk=matp(1)%irow(i),matp(1)%irow(i+1)-1
           k = matp(1)%cols(kk)
           vfine(i,:) = vfine(i,:) + matp(1)%val(kk)*temp(k,:)
        END DO
     END DO
 !
     DEALLOCATE(temp)
   END FUNCTION prolong_2d_csr
 !--------------------------------------------------------------------------------
   FUNCTION restrict_2d(matp,vfine) RESULT(vcoarse)
 !
 !   Restriction
 !
     TYPE(gemat), INTENT(in) :: matp(2)
     DOUBLE PRECISION, INTENT(in)  :: vfine(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: vcoarse(:,:)
 !
     DOUBLE PRECISION, POINTER     :: pmat1(:,:), pmat2(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: temp(:,:)
     DOUBLE PRECISION :: one=1.0d0, zero=0.0d0
     INTEGER :: m1, m1p, m2, m2p
 !
     pmat1 => matp(1)%val
     pmat2 => matp(2)%val
     m1 = SIZE(pmat1,1); m1p = SIZE(pmat1,2)
     m2 = SIZE(pmat2,1); m2p = SIZE(pmat2,2)
     ALLOCATE(vcoarse(m1p,m2p))
     ALLOCATE(temp(m1p,m2))
 !
 !    Compute (P1)^T * V
 !
     CALL dgemm('T', 'N', m1p, m2, m1, one, pmat1, m1, vfine, m1, zero, &
          &     temp, m1p)
 !
 !    Compute (P1)^T * V * P2
 !
     CALL dgemm('N', 'N', m1p, m2p, m2, one, temp, m1p, pmat2, m2, zero, &
          &     vcoarse, m1p)
 !
     DEALLOCATE(temp)
   END FUNCTION restrict_2d
 !--------------------------------------------------------------------------------
   FUNCTION restrict_2d_csr(matp,vfine) RESULT(vcoarse)
 !
 !   Restriction using CSR prolongation matrix
 !
     TYPE(csr_mat), INTENT(in)     :: matp(2)
     DOUBLE PRECISION, INTENT(in)  :: vfine(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: vcoarse(:,:)
 !
     DOUBLE PRECISION, ALLOCATABLE :: temp(:,:)
     INTEGER :: m1, m1p, m2, m2p
     INTEGER :: i, ii, j, jj, k
 !
     m1 = matp(1)%mrows; m1p = matp(1)%ncols
     m2 = matp(2)%mrows; m2p = matp(2)%ncols
     ALLOCATE(vcoarse(m1p,m2p))
     ALLOCATE(temp(m1,m2p))
     temp = 0.0d0
     vcoarse = 0.0d0
 !
 !    Compute temp = V * (R2)^T = V * (P2)
 !     t_ij = sum_{k=1}^{m2} V_ik (P2)_kj, i=1:m1, j=1:m2p
 !
     DO k=1,m2
        DO jj=matp(2)%irow(k),matp(2)%irow(k+1)-1
           j = matp(2)%cols(jj)
           temp(:,j) = temp(:,j) + vfine(:,k)*matp(2)%val(jj)
        END DO
     END DO
 !
 !    Compute (R1) * V * (R2)^T = (P1)^T) * V * (P2)
 !     V_ij = sum_{k=1}^{m1p} (P1)_ki t_kj, i=1:m1p, j=1:m2p
 !
     DO k=1,m1
        DO ii=matp(1)%irow(k),matp(1)%irow(k+1)-1
           i = matp(1)%cols(ii)
           vcoarse(i,:) = vcoarse(i,:) + matp(1)%val(ii)*temp(k,:)
        END DO
     END DO
 !
     DEALLOCATE(temp)
   END FUNCTION restrict_2d_csr
 !--------------------------------------------------------------------------------
   FUNCTION prolong_cyl(grid, vcoarse, nluniq) RESULT(vfine)
 !
 !   Prolongation (cylindrical case)
 !
     TYPE(grid2d)                    :: grid
     DOUBLE PRECISION, INTENT(inout) :: vcoarse(:,:)
     DOUBLE PRECISION, ALLOCATABLE   :: vfine(:,:)
     LOGICAL, INTENT(in)             :: nluniq
 !
     DOUBLE PRECISION, ALLOCATABLE :: temp(:,:)
     INTEGER :: m1, m1p, m2, m2p
     INTEGER :: i, j, k, kk
 !
     m1 = grid%matp(1)%mrows; m1p = grid%matp(1)%ncols
     m2 = grid%matp(2)%mrows; m2p = grid%matp(2)%ncols
     ALLOCATE(vfine(m1,m2))
     ALLOCATE(temp(m1p,m2))
     temp = 0.0d0
     vfine = 0.0d0
 !
     IF(nluniq) vcoarse(1,1:m2p-1) = vcoarse(1,m2p)
 !
 !    Compute temp = V * (P2)^T
 !     t_ij = sum_{k=1}^{m2p} V_ik (P2)_jk, i=1:m1p, j=1:m2
 !
     DO j=1,m2
        DO kk=grid%matp(2)%irow(j),grid%matp(2)%irow(j+1)-1
           k = grid%matp(2)%cols(kk)
           temp(:,j) = temp(:,j) + vcoarse(:,k)*grid%matp(2)%val(kk)
        END DO
     END DO
 !
 !    Compute (P1) * V * (P2)^T
 !     V_ij = sum_{k=1}^{m1p} (P1)_ik t_kj, i=1:m1, j=1:m2
 !
     DO i=1,m1
        DO kk=grid%matp(1)%irow(i),grid%matp(1)%irow(i+1)-1
           k = grid%matp(1)%cols(kk)
           vfine(i,:) = vfine(i,:) + grid%matp(1)%val(kk)*temp(k,:)
        END DO
     END DO
 !!$!
 !!$!    Compute (P1) * V
 !!$!
 !!$    CALL dgemm('N', 'N', m1, m2p, m1p, one, pmat1, m1, vcoarse, m1p, zero, &
 !!$         &     temp, m1)
 !!$!
 !!$!    Compute (P1) * V * (P2)^T
 !!$!
 !!$    CALL dgemm('N', 'T', m1, m2, m2p, one, temp, m1, pmat2, m2, zero, &
 !!$         &     vfine, m1)
 !
     IF(nluniq) THEN
        vcoarse(1,1:m2p-1) = vcoarse(1,1:m2p-1) - vcoarse(1,m2p)
        vfine(1,1:m2-1) =  vfine(1,1:m2-1) - vfine(1,m2)
     END IF
 !
     DEALLOCATE(temp)
   END FUNCTION prolong_cyl
 !--------------------------------------------------------------------------------
   FUNCTION restrict_cyl(grid, vfine, nluniq) RESULT(vcoarse)
 !
 !   Restriction (cylindrical case)
 !
     TYPE(grid2d)                    :: grid
     DOUBLE PRECISION, INTENT(inout) :: vfine(:,:)
     DOUBLE PRECISION, ALLOCATABLE   :: vcoarse(:,:)
     LOGICAL, INTENT(in)             :: nluniq
 !
     DOUBLE PRECISION, ALLOCATABLE :: temp(:,:)
     INTEGER :: m1, m1p, m2, m2p
     INTEGER :: i, ii, j, jj, k
 !
     m1 = grid%matp(1)%mrows; m1p = grid%matp(1)%ncols
     m2 = grid%matp(2)%mrows; m2p = grid%matp(2)%ncols
     ALLOCATE(vcoarse(m1p,m2p))
     ALLOCATE(temp(m1,m2p))
     temp = 0.0d0
     vcoarse = 0.0d0
 !
     IF(nluniq) vfine(1,1:m2) = vfine(1,m2)/REAL(m2,8)
 !
 !    Compute temp = V * (R2)^T = V * (P2)
 !     t_ij = sum_{k=1}^{m2} V_ik (P2)_kj, i=1:m1, j=1:m2p
 !
     DO k=1,m2
        DO jj=grid%matp(2)%irow(k),grid%matp(2)%irow(k+1)-1
           j = grid%matp(2)%cols(jj)
           temp(:,j) = temp(:,j) + vfine(:,k)*grid%matp(2)%val(jj)
        END DO
     END DO
 !
 !    Compute (R1) * V * (R2)^T = (P1)^T) * V * (P2)
 !     V_ij = sum_{k=1}^{m1p} (P1)_ki t_kj, i=1:m1p, j=1:m2p
 !
     DO k=1,m1
        DO ii=grid%matp(1)%irow(k),grid%matp(1)%irow(k+1)-1
           i = grid%matp(1)%cols(ii)
           vcoarse(i,:) = vcoarse(i,:) + grid%matp(1)%val(ii)*temp(k,:)
        END DO
     END DO
 !
 !!$!    Compute (P1)^T * V
 !!$!
 !!$    CALL dgemm('T', 'N', m1p, m2, m1, one, pmat1, m1, vfine, m1, zero, &
 !!$         &     temp, m1p)
 !!$!
 !!$!    Compute (P1)^T * V * P2
 !!$!
 !!$    CALL dgemm('N', 'N', m1p, m2p, m2, one, temp, m1p, pmat2, m2, zero, &
 !!$         &     vcoarse, m1p)
 !
     IF(nluniq) THEN
        vfine(1,m2) = SUM(vfine(1,1:m2)); vfine(1,1:m2-1) = 0.0d0
        vcoarse(1,m2p) =  SUM(vcoarse(1,1:m2p)); vcoarse(1,m2p-1) = 0.0d0
     END IF
 !
     DEALLOCATE(temp)
 !
   END FUNCTION restrict_cyl
 !--------------------------------------------------------------------------------
   SUBROUTINE massmat_ge(spl, alpha, matm)
 !
 !   Compute mass matrix
 !
     TYPE(spline1d), INTENT(in)   :: spl
     INTEGER, INTENT(in)          :: alpha
     TYPE(gemat), INTENT(out)     :: matm
 !
     INTEGER :: nrank, nx, nidbas, kl, ku
 !
     CALL get_dim(spl, nrank, nx, nidbas)
     kl=nidbas; ku=kl
     IF(spl%period) nrank = nx
     CALL init(nrank, 1, matm)
     CALL conmat(spl, matm, coefeq)
   CONTAINS
     SUBROUTINE coefeq(x, idt, idw, c)
       DOUBLE PRECISION, INTENT(in) :: x
       INTEGER, INTENT(out) :: idt(:), idw(:)
       DOUBLE PRECISION, INTENT(out) :: c(:)
       c(1) = x**alpha
       idt(1) = 0
       idw(1) = 0
     END SUBROUTINE coefeq
   END SUBROUTINE massmat_ge
 !--------------------------------------------------------------------------------
   SUBROUTINE massmat_gb(spl, alpha, matm)
 !
 !   Compute mass matrix
 !
     TYPE(spline1d), INTENT(in)   :: spl
     INTEGER, INTENT(in)          :: alpha
     TYPE(gbmat), INTENT(out)     :: matm
 !
     INTEGER :: nrank, nx, nidbas, kl, ku
 !
     CALL get_dim(spl, nrank, nx, nidbas)
     kl=nidbas; ku=kl
     CALL init(kl, ku, nrank, 1, matm)
     CALL conmat(spl, matm, coefeq)
   CONTAINS
     SUBROUTINE coefeq(x, idt, idw, c)
       DOUBLE PRECISION, INTENT(in) :: x
       INTEGER, INTENT(out) :: idt(:), idw(:)
       DOUBLE PRECISION, INTENT(out) :: c(:)
       c(1) = x**alpha
       idt(1) = 0
       idw(1) = 0
     END SUBROUTINE coefeq
   END SUBROUTINE massmat_gb
 !--------------------------------------------------------------------------------
   SUBROUTINE massmat_periodic(spl, alpha, matm)
 !
 !   Compute mass matrix (periodic case)
 !
     TYPE(spline1d), INTENT(in)      :: spl
     INTEGER, INTENT(in)             :: alpha
     TYPE(periodic_mat), INTENT(out) :: matm
 !
     INTEGER :: dim, nrank, nx, nidbas, kl, ku
 !
     CALL get_dim(spl, dim, nx, nidbas)
     kl=nidbas; ku=kl
     nrank = nx
     CALL init(kl, ku, nrank, 1, matm)
     CALL conmat(spl, matm, coefeq)
   CONTAINS
     SUBROUTINE coefeq(x, idt, idw, c)
       DOUBLE PRECISION, INTENT(in) :: x
       INTEGER, INTENT(out) :: idt(:), idw(:)
       DOUBLE PRECISION, INTENT(out) :: c(:)
       c(1) = x**alpha
       idt(1) = 0
       idw(1) = 0
     END SUBROUTINE coefeq
   END SUBROUTINE massmat_periodic
 !--------------------------------------------------------------------------------
   SUBROUTINE femat_2d_csr(spl, mat, coefeq, nterms, maxder_in, nat_order_in, &
        &                  noinit)
 !
 !   Compute 2d fe CSR matrix
 !
     TYPE(spline2d), INTENT(in)    :: spl
     TYPE(csr_mat), INTENT(inout)  :: mat
     INTEGER, INTENT(in)           :: nterms
     INTEGER, INTENT(in), OPTIONAL :: maxder_in(2)
     LOGICAL, INTENT(in), OPTIONAL :: nat_order_in
     LOGICAL, INTENT(in), OPTIONAL :: noinit
     INTERFACE
        SUBROUTINE coefeq(x, y, idt, idw, c)
          DOUBLE PRECISION, INTENT(in) :: x, y
          INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
          DOUBLE PRECISION, INTENT(out) :: c(:)
        END SUBROUTINE coefeq
     END INTERFACE
 !
     INTEGER :: nrank, ndim(2), nints(2), maxder(2)
     LOGICAL :: nat_order, run_init
 !
     CALL get_dim(spl, ndim, nints)
     IF(spl%sp2%period) THEN
        nrank = ndim(1)*nints(2)
     ELSE
        nrank = PRODUCT(ndim)
     END IF
 !
     maxder = 1;         IF(PRESENT(maxder_in)) maxder = maxder_in
     nat_order = .TRUE.; IF(PRESENT(nat_order_in)) nat_order = nat_order_in
 !
     run_init = .TRUE.
     IF(PRESENT(noinit)) run_init = .NOT.noinit
     IF(run_init)       CALL init(nrank, nterms, mat)
 !
     CALL conmat(spl, mat, coefeq, maxder, nat_order)
   END SUBROUTINE femat_2d_csr
 !--------------------------------------------------------------------------------
   SUBROUTINE femat_ge(spl, mat, coefeq)
 !
 !   Compute fe matrix
 !
     TYPE(spline1d), INTENT(in)   :: spl
     TYPE(gemat), INTENT(out)     :: mat
     INTERFACE
        SUBROUTINE coefeq(x, idt, idw, c)
          DOUBLE PRECISION, INTENT(in) :: x
          INTEGER, INTENT(out) :: idt(:), idw(:)
          DOUBLE PRECISION, INTENT(out) :: c(:)
        END SUBROUTINE coefeq
     END INTERFACE
 !
     INTEGER :: nrank, nx, nidbas, kl, ku
 !
     CALL get_dim(spl, nrank, nx, nidbas)
     kl=nidbas; ku=kl
     IF(spl%period) nrank = nx
     CALL init(nrank, 2, mat)
     CALL conmat(spl, mat, coefeq)
   END SUBROUTINE femat_ge
 !--------------------------------------------------------------------------------
   SUBROUTINE femat_gb(spl, mat, coefeq)
 !
 !   Compute fe matrix
 !
     TYPE(spline1d), INTENT(in)   :: spl
     TYPE(gbmat), INTENT(out)     :: mat
     INTERFACE
        SUBROUTINE coefeq(x, idt, idw, c)
          DOUBLE PRECISION, INTENT(in) :: x
          INTEGER, INTENT(out) :: idt(:), idw(:)
          DOUBLE PRECISION, INTENT(out) :: c(:)
        END SUBROUTINE coefeq
     END INTERFACE
 !
     INTEGER :: nrank, nx, nidbas, kl, ku
 !
     CALL get_dim(spl, nrank, nx, nidbas)
     kl=nidbas; ku=kl
     CALL init(kl, ku, nrank, 2, mat)
     CALL conmat(spl, mat, coefeq)
   END SUBROUTINE femat_gb
 !--------------------------------------------------------------------------------
   SUBROUTINE femat_periodic(spl, mat, coefeq)
 !
 !   Compute fe matrix
 !
     TYPE(spline1d), INTENT(in)      :: spl
     TYPE(periodic_mat), INTENT(out) :: mat
     INTERFACE
        SUBROUTINE coefeq(x, idt, idw, c)
          DOUBLE PRECISION, INTENT(in) :: x
          INTEGER, INTENT(out) :: idt(:), idw(:)
          DOUBLE PRECISION, INTENT(out) :: c(:)
        END SUBROUTINE coefeq
     END INTERFACE
 !
     INTEGER :: nrank, dim, nx, nidbas, kl, ku
 !
     CALL get_dim(spl, dim, nx, nidbas)
     kl=nidbas; ku=kl
     nrank = nx
     CALL init(kl, ku, nrank, 2, mat)
     CALL conmat(spl, mat, coefeq)
   END SUBROUTINE femat_periodic
 !--------------------------------------------------------------------------------
   SUBROUTINE ibcmat_1d(irow, mat)
 !
 !   Set BC at row irow to 0
 !
     INTEGER, INTENT(in)        :: irow
     TYPE(gbmat), INTENT(inout) :: mat
 !
     DOUBLE PRECISION :: a(mat%rank)
 !
     a(:)=0.0d0; a(irow)=1.0d0
     CALL putrow(mat, irow, a)
     CALL putcol(mat, irow, a)
   END SUBROUTINE ibcmat_1d
 !--------------------------------------------------------------------------------
  SUBROUTINE ibcmat_2d(grid, mat, nluniq_in)
 !
 !   Impose BC on matrix (asume natural ordering)
 !      I = (j-1)*N1 + i, i=1:N1, j=1:N2
 !
     TYPE(grid2d), INTENT(in)      :: grid
     TYPE(csr_mat), INTENT(inout)  :: mat
     LOGICAL, INTENT(in), OPTIONAL :: nluniq_in
 !
     DOUBLE PRECISION :: temp(mat%rank)
     INTEGER :: n1e, n2e, nrank, i, j, irow, jcol
     LOGICAL :: nlper1, nlper2, nlcart, nluniq
 !
     n1e = grid%rank(1)
     n2e = grid%rank(2)
     nrank = mat%rank
     nlper1 = grid%spl%sp1%period
     nlper2 = grid%spl%sp2%period
     nlcart = (.NOT.nlper1) .AND. (.NOT.nlper2)
     IF(PRESENT(nluniq_in)) THEN
        nluniq = nluniq_in
     ELSE
        nluniq = .TRUE.
     END IF
 !
 ! BC at x=0   ! Dirichlet for Cartesian, unicity for cylindrical problem
     IF(nlcart) THEN
        i=1
        DO j=1,n2e
           irow = (j-1)*n1e + i
           temp = 0.0d0; temp(irow) = 1.0d0
           CALL putrow(mat, irow, temp)
           CALL putcol(mat, irow, temp)
        END DO
     ELSE
        i=1
        IF(nluniq) THEN
           CALL unicity
        END IF
     END IF
 !
 ! BC at x=1  ! For both Cartesian and cylindrical
     i=n1e
     DO j=1,n2e
        irow = (j-1)*n1e + i
        temp = 0.0d0; temp(irow) = 1.0d0
        CALL putrow(mat, irow, temp)
        CALL putcol(mat, irow, temp)
     END DO
 !
 ! BC at y=0   ! Only for Cartesian problem
     IF(nlcart) THEN
        j=1
        DO i=1,n1e
           irow = (j-1)*n1e + i
           temp = 0.0d0; temp(irow) = 1.0d0
           CALL putrow(mat, irow, temp)
           CALL putcol(mat, irow, temp)
        END DO
     END IF
 !
 ! BC at y=1   ! Only for Cartesian problem
     IF(nlcart) THEN
        j=n2e
        DO i=1,n1e
           irow = (j-1)*n1e + i
           temp = 0.0d0; temp(irow) = 1.0d0
           CALL putrow(mat, irow, temp)
           CALL putcol(mat, irow, temp)
        END DO
     END IF
 !
   CONTAINS
     SUBROUTINE unicity
       INTEGER          :: irow0, jcol0
       DOUBLE PRECISION :: temp_sum(mat%rank) 
 !
       irow0 = (n2e-1)*n1e + i
       jcol0 = irow0
 !
 !   Vertical sum
       temp_sum(:) = 0.0d0
       DO j=1,n2e
          irow = (j-1)*n1e + i
          temp = 0.0d0
          CALL getrow(mat, irow, temp)
          temp_sum(:) = temp_sum(:) + temp(:)
       END DO
       CALL putrow(mat, irow0, temp_sum)
 !
 !  Horizontal sum
       temp_sum(:) = 0.0d0
       DO j=1,n2e
          jcol = (j-1)*n1e + i
          temp = 0.0d0
          CALL getcol(mat, jcol, temp)
          temp_sum(:) = temp_sum(:) + temp(:)
       END DO
       CALL putcol(mat, jcol0, temp_sum)
 !
 !  The away operator
       DO j=1,n2e-1
          irow = (j-1)*n1e + i
          temp = 0.0d0; temp(irow) = 1.0d0
          CALL putrow(mat, irow, temp)
          CALL putcol(mat, irow, temp)
       END DO
     END SUBROUTINE unicity
   END SUBROUTINE ibcmat_2d
 !--------------------------------------------------------------------------------
   SUBROUTINE mod_transf_full(mat,k)
 !
 !    Modify grid transfer matrix.
 !
     DOUBLE PRECISION, INTENT(inout) :: mat(:,:)
     INTEGER, INTENT(in) :: k
     INTEGER :: m, n
 !
     m=SIZE(mat,1)
     n=SIZE(mat,2)
 !
 ! Clear matrix small elements.
     WHERE( ABS(mat) < 1.d-8) mat=0.0d0
 !
 ! Left boundary
     IF(k.EQ.1 .OR. k.EQ.3) THEN
        mat(2:m,1) = 0.0d0
     END IF
 !
 ! Right boundary
     IF(k.EQ.2 .OR. k.EQ.3) THEN
        mat(1:m-1,n) = 0.0d0
     END IF
   END SUBROUTINE mod_transf_full
 !--------------------------------------------------------------------------------
   SUBROUTINE mod_transf_csr(mat,k)
 !
 !    Modify grid transfer matrix.
 !
     TYPE(csr_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in)          :: k
 !
     DOUBLE PRECISION :: acol(mat%mrows)
     INTEGER :: m, n
 !
     m=mat%mrows
     n=mat%ncols
 !
 ! Left boundary
     acol = 0.0d0
     IF(k.EQ.1 .OR. k.EQ.3) THEN
        CALL getele(mat, 1, 1, acol(1))
        CALL putcol(mat, 1, acol)
     END IF
 !
 ! Right boundary
     acol = 0.0d0
     IF(k.EQ.2 .OR. k.EQ.3) THEN
        CALL getele(mat, m, n, acol(m))
        CALL putcol(mat, n, acol)
     END IF
   END SUBROUTINE mod_transf_csr
 !--------------------------------------------------------------------------------
   SUBROUTINE calc_pmat(grid1, grid2, pmat, debug_in)
 !
 !   Compute prolongation matrix by collocation
 !
     TYPE(grid1d), INTENT(in)      :: grid1, grid2
     TYPE(gemat), INTENT(out)      :: pmat
     LOGICAL, OPTIONAL, INTENT(in) :: debug_in
 !
     TYPE(gemat) :: mat_interp
     DOUBLE PRECISION, ALLOCATABLE :: fun(:,:), fun2(:,:)
     DOUBLE PRECISION :: xinter
     INTEGER, ALLOCATABLE :: jcol(:)
     INTEGER :: nidbas, nfine, ncoarse, nderv
     LOGICAL :: nlper, debug
     INTEGER :: i, i0, ii, k, irow
 !================================================================================
 !                       0. Prologue
 !
     debug = .FALSE.
     IF(PRESENT(debug_in)) debug = debug_in
 !
     nfine = grid1%n
     ncoarse = grid2%n
     nidbas = grid1%spl%order - 1
     nlper = grid1%spl%period
 !
     IF(nlper) THEN
        IF(ncoarse .LT. nidbas+1) THEN
           WRITE(*,'(/a)') '** NX/2 should be larger or equal to NIDBAS+1 **'
           STOP
        END IF
    END IF
 !
     IF(debug) THEN
        WRITE(*,'(/2(a,i0))') 'nfine = ', nfine, ', ncoarse = ', ncoarse
        IF(nlper) WRITE(*,'(a)') 'Grids are periodic!'
     END IF
 !
     ALLOCATE(jcol(0:nidbas))
     ALLOCATE(fun(0:nidbas,1))
     IF(nlper) THEN
        CALL init(ncoarse, 1, pmat, mrows=nfine)
        CALL init(nfine, 1, mat_interp)
     ELSE
        CALL init(ncoarse+nidbas, 1, pmat, mrows=nfine+nidbas)
        CALL init(nfine+nidbas, 1, mat_interp)
     END IF
 !================================================================================
 !                       1. Interpolation matrix
 !
     irow = 0
     i0 = 1
 !
 !    Left bound
     IF(.NOT.nlper) THEN 
        IF(MODULO(nidbas,2).EQ.1) THEN
           nderv = (nidbas-1)/2   ! ndidbas = 1, 3, 5, ...
           ALLOCATE(fun2(0:nidbas,nderv+1))
           CALL basfun(grid1%x(0), grid1%spl, fun2, 1)
           jcol = 1 + (/ (i, i=0,nidbas) /)
           DO k=1,nderv+1
              irow = irow+1
              mat_interp%val(irow,jcol) = fun2(0:nidbas,k)
           END DO
           i0 = 2  ! Skip the first grid point
        ELSE
           nderv = nidbas/2-1   ! ndidbas = 2, 4, ...
           ALLOCATE(fun2(0:nidbas,nderv+1))
           CALL basfun(grid1%x(0), grid1%spl, fun2, 1)
           jcol = 1 + (/ (i, i=0,nidbas) /)
           DO k=1,nderv+1
              irow = irow+1
              mat_interp%val(irow,jcol) = fun2(0:nidbas,k)
           END DO
        END IF
     END IF
     DO i=i0,nfine
        IF(MODULO(nidbas,2).EQ.0) THEN
           xinter = (grid1%x(i-1)+grid1%x(i))/2.0d0  ! Left bound of interval
        ELSE
           xinter = grid1%x(i-1)                     ! Left bound of interval
        END IF
        CALL basfun(xinter, grid1%spl, fun, i)
        irow = irow+1
        DO k=0,nidbas
           jcol(k) = i+k
        END DO
        IF(nlper) jcol = MODULO(jcol-1,nfine)+1
        mat_interp%val(irow,jcol) = fun(0:nidbas,1)
     END DO
 !
 !   Right bound
     IF(.NOT.nlper) THEN
        CALL basfun(grid1%x(nfine), grid1%spl, fun2, nfine)
        jcol = nfine + (/ (i, i=0,nidbas) /)
        DO k=nderv+1,1,-1
           irow = irow+1
           mat_interp%val(irow,jcol) = fun2(0:nidbas,k)
        END DO
     END IF
     IF(debug) CALL printmat('** Interpolation matrix **', mat_interp)
 !================================================================================
 !                       2. RHS matrix
 !
     irow = 0
     i0 = 1
     DO i=1,ncoarse
        ii = 2*i-1
        CALL comp_rhs(ii)
        CALL comp_rhs(ii+1)
     END DO
     IF(debug) CALL printmat('** RHS matrix **', pmat)
 !================================================================================
 !                       3. Compute prolongation matrix
 !
     CALL factor(mat_interp)
     CALL bsolve(mat_interp, pmat%val)
 !================================================================================
 !                       9. Epilogue
 !
     CALL destroy(mat_interp)
     DEALLOCATE(jcol)
     DEALLOCATE(fun)
     IF(ALLOCATED(fun2)) DEALLOCATE(fun2)
 !
   CONTAINS
     SUBROUTINE comp_rhs(ii)
       INTEGER, INTENT(in) :: ii
       INTEGER :: k
 !
 !    Left  bounds for non-periodic cases
       IF(.NOT.nlper .AND. ii.EQ.1) THEN
          CALL basfun(grid1%x(0), grid2%spl, fun2, 1)
          jcol = 1 + (/ (k, k=0,nidbas) /)
          DO k=1,nderv+1
             irow = irow+1
             pmat%val(irow,jcol) = fun2(0:nidbas,k)
          END DO
          IF(MODULO(nidbas,2).EQ.1) RETURN   ! Skip
       END IF
 !
       IF(MODULO(nidbas,2).EQ.0) THEN
          xinter = (grid1%x(ii-1)+grid1%x(ii))/2.0d0 ! Left bound of interval
       ELSE
          xinter = grid1%x(ii-1)                     ! Left bound of interval
       END IF
       CALL basfun(xinter, grid2%spl, fun, i)
       irow = irow+1
       DO k=0,nidbas
          jcol(k) = i+k
       END DO
       IF(nlper) jcol = MODULO(jcol-1,ncoarse)+1
       pmat%val(irow,jcol) = fun(0:nidbas,1)
 !
 !    Right  bounds for non-periodic cases
       IF(.NOT.nlper .AND. ii.EQ.nfine) THEN
          CALL basfun(grid1%x(nfine), grid2%spl, fun2, ncoarse)
          jcol = ncoarse + (/ (k, k=0,nidbas) /)
          DO k=nderv+1,1,-1
             irow = irow+1
             pmat%val(irow,jcol) = fun2(0:nidbas,k)
          END DO
       END IF
     END SUBROUTINE comp_rhs
   END SUBROUTINE calc_pmat
 !--------------------------------------------------------------------------------
   SUBROUTINE disrhs_1d(spl, farr, frhs)
 !
 !  Projection of RHS on spline basis functions
 !
     TYPE(spline1d)                :: spl
     DOUBLE PRECISION, INTENT(out) :: farr(:)
     INTERFACE
        DOUBLE PRECISION FUNCTION frhs(x)
          DOUBLE PRECISION, INTENT(in) :: x
        END FUNCTION frhs
     END INTERFACE
     DOUBLE PRECISION :: contrib
 !
     DOUBLE PRECISION, POINTER :: xg(:,:), wg(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
     INTEGER :: ndim, n, nidbas, ng
     INTEGER :: i, ig, it, irow
     LOGICAL :: nlper
 !
     CALL get_dim(spl, ndim, n, nidbas)
     nlper = spl%period
     xg => spl%gausx ! xg(ng,n)
     wg => spl%gausw ! wg(ng,n)
     ng = SIZE(xg,1)
     ALLOCATE(fun(0:nidbas,1))
 !
     farr = 0.0d0
     DO i=1,n
        DO ig=1,ng
           CALL basfun(xg(ig,i), spl, fun, i)
           contrib = wg(ig,i)*frhs(xg(ig,i))
           DO it=0,nidbas
              irow = i+it
              IF(nlper) irow = MODULO(irow-1,n) +1
              farr(irow) = farr(irow)+contrib*fun(it,1)
           END DO
        END DO
     END DO
 !
     DEALLOCATE(fun)
   END SUBROUTINE disrhs_1d
 !--------------------------------------------------------------------------------
   SUBROUTINE disrhs_2d(spl, farr, frhs)
 !
 !  Projection of RHS on spline basis functions
 !
     TYPE(spline2d)                :: spl
     DOUBLE PRECISION, INTENT(out) :: farr(:,:)
     INTERFACE
        DOUBLE PRECISION FUNCTION frhs(x,y)
          DOUBLE PRECISION, INTENT(in) :: x,y
        END FUNCTION frhs
     END INTERFACE
     DOUBLE PRECISION :: contrib
 !
     DOUBLE PRECISION, POINTER :: xg1(:,:), wg1(:,:)
     DOUBLE PRECISION, POINTER :: xg2(:,:), wg2(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: fun2(:,:)
     INTEGER :: ndim1, n1, nidbas1, ng1
     INTEGER :: ndim2, n2, nidbas2, ng2
     INTEGER :: i1, ig1, it1, irow
     INTEGER :: i2, ig2, it2, jcol
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
 !
     xg1 => spl%sp1%gausx ! xg(ng,n)
     wg1 => spl%sp1%gausw ! wg(ng,n)
     ng1 = SIZE(xg1,1)
     xg2 => spl%sp2%gausx ! xg(ng,n)
     wg2 => spl%sp2%gausw ! wg(ng,n)
     ng2 = SIZE(xg2,1)
 !
     ALLOCATE(fun1(0:nidbas1,1))
     ALLOCATE(fun2(0:nidbas2,1))
 !
     farr = 0.0d0
     DO i1=1,n1
        DO ig1=1,ng1
           CALL basfun(xg1(ig1,i1), spl%sp1, fun1, i1)
           DO i2=1,n2
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2,i2), spl%sp2, fun2, i2)
                 contrib = wg1(ig1,i1)*wg2(ig2,i2)* &
                      & frhs(xg1(ig1,i1), xg2(ig2,i2))
                 DO it1=0,nidbas1
                    irow = i1+it1
                    DO it2=0,nidbas2
                       jcol = i2+it2
                       farr(irow,jcol) = farr(irow,jcol) + &
                            &   contrib*fun1(it1,1)*fun2(it2,1)
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !
 !  Case of periodic BC (only in 2nd dimension!)
 !
     IF(spl%sp2%period) THEN
        DO jcol=1,nidbas2
           farr(:,jcol) = farr(:,jcol)+farr(:,jcol+n2)
           farr(:,jcol+n2) = 0.0d0
        END DO
     END IF
     DEALLOCATE(fun1)
     DEALLOCATE(fun2)
   END SUBROUTINE disrhs_2d
 !--------------------------------------------------------------------------------
   SUBROUTINE ibcrhs(grid, f, nluniq_in)
 !
 ! Impose BC on RHS
 !
     TYPE(grid2d)                    :: grid
     DOUBLE PRECISION, INTENT(inout) :: f(:,:)
     LOGICAL, INTENT(in), OPTIONAL   :: nluniq_in
 !
     DOUBLE PRECISION :: temp
     INTEGER :: n1, n2
     LOGICAL :: nlper1, nlper2, nlcyl, nluniq
 !
     n1 = grid%rank(1)
     n2 = grid%rank(2)
     nlper1 = grid%spl%sp1%period
     nlper2 = grid%spl%sp2%period
     nlcyl = (.NOT.nlper1) .AND. (nlper2)
     IF(PRESENT(nluniq_in)) THEN 
        nluniq = nluniq_in
     ELSE
        nluniq=.TRUE.
     END IF
 !
 !  Cylindrical case, unicity at the axis, 0 at the right side
     IF(nlcyl) THEN  ! 
        IF(nluniq) THEN
           temp = SUM(f(1,1:n2))
           f(1,n2) = temp
           f(1,1:n2-1) = 0.0d0
        END IF
        f(n1,1:n2) = 0.0d0
 !
 !  Cartesian case: 0 on all 4 boundaries
     ELSE
        f(1,:) = 0.0d0; f(n1,:) = 0.0d0
        f(:,1) = 0.0d0; f(:,n2) = 0.0d0
     END IF
   END SUBROUTINE ibcrhs
 !--------------------------------------------------------------------------------
   FUNCTION disc_err_1d(spl, f, fexact) RESULT(disc_err)
 !
 !  L2 norm of discretization error
 !
     TYPE(spline1d)               :: spl
     DOUBLE PRECISION, INTENT(in) :: f(:)
     DOUBLE PRECISION             :: disc_err
     INTERFACE
        FUNCTION fexact(x)
          DOUBLE PRECISION, INTENT(in) :: x(:)
          DOUBLE PRECISION :: fexact(SIZE(x))
        END FUNCTION fexact
     END INTERFACE
 !
     DOUBLE PRECISION, ALLOCATABLE :: err(:,:)
     DOUBLE PRECISION, POINTER :: xg(:,:), wg(:,:)
     INTEGER :: ndim, n, nidbas, ng
     INTEGER :: ig
 !
     CALL get_dim(spl, ndim, n, nidbas)
     xg => spl%gausx ! xg(ng,n)
     wg => spl%gausw ! wg(ng,n)
     ng = SIZE(xg,1)
 !
     ALLOCATE(err(ng,n))
     CALL gridval(spl, xg(1,:), err(1,:), 0, f)
     err(1,:) = (err(1,:) - fexact(xg(1,:)))**2
     DO ig=2,ng
        CALL gridval(spl, xg(ig,:), err(ig,:), 0)
        err(ig,:) = (err(ig,:) - fexact(xg(ig,:)))**2
     END DO
 !
     disc_err = SQRT(SUM(wg*err))
 !
     DEALLOCATE(err)
   END FUNCTION disc_err_1d
 !--------------------------------------------------------------------------------
   FUNCTION disc_err_2d(spl, f, fexact) RESULT(disc_err)
 !
 !  L2 norm of discretization error
 !
     TYPE(spline2d)               :: spl
     DOUBLE PRECISION, INTENT(in) :: f(:,:)
     DOUBLE PRECISION             :: disc_err
     INTERFACE
        FUNCTION fexact(x,y)
          DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
          DOUBLE PRECISION :: fexact(SIZE(x),SIZE(y))
        END FUNCTION fexact
     END INTERFACE
 !
     DOUBLE PRECISION, ALLOCATABLE :: err(:,:)
     DOUBLE PRECISION, POINTER :: xg(:,:), wg1(:,:), yg(:,:), wg2(:,:)
     INTEGER, DIMENSION(2) :: ndim, n, ng
     INTEGER :: i, j, ig, jg
     LOGICAL :: nlper1, nlper2, nlcyl
 !
     CALL get_dim(spl, ndim, n)
     xg => spl%sp1%gausx ! xg(ng,n)
     wg1 => spl%sp1%gausw ! wg(ng,n)
     ng(1) = SIZE(xg,1)
     yg => spl%sp2%gausx ! xg(ng,n)
     wg2 => spl%sp2%gausw ! wg(ng,n)
     ng(2) = SIZE(yg,1)
 !
     nlper1 = spl%sp1%period
     nlper2 = spl%sp2%period
     nlcyl = (.NOT.nlper1) .AND. (nlper2)
 !
     disc_err = 0.0d0
     ALLOCATE(err(n(1),n(2)))
     DO ig=1,ng(1)
        DO jg=1,ng(2)
           IF(ig.EQ.1.AND.jg.EQ.1) THEN
              CALL gridval(spl, xg(ig,:), yg(jg,:), err, [0,0], f)
           ELSE
              CALL gridval(spl, xg(ig,:), yg(jg,:), err, [0,0])
           END IF
           err = (err - fexact(xg(ig,:), yg(jg,:)))**2
           DO i=1,n(1)
              DO j=1,n(2)
                 IF(nlcyl) THEN
                    disc_err = disc_err + xg(ig,i)*wg1(ig,i)*wg2(jg,j)*err(i,j)
                 ELSE
                    disc_err = disc_err + wg1(ig,i)*wg2(jg,j)*err(i,j)
                 END IF
              END DO
           END DO
        END DO
     END DO
     disc_err = SQRT(disc_err)
 !
     DEALLOCATE(err)
   END FUNCTION disc_err_2d
 !--------------------------------------------------------------------------------
   SUBROUTINE back_transf(grid, u, nluniq_in)
 !
 !   Back transform solution and use  periodicity (cylindrical problem)
 !
     TYPE(grid2d), INTENT(in)        :: grid
     DOUBLE PRECISION, INTENT(inout) :: u(:,:)
     LOGICAL, INTENT(in), OPTIONAL   :: nluniq_in
 !
     LOGICAL :: nluniq
     INTEGER :: n, nidbas, j
 !
     n = grid%n(2)
     nidbas = grid%spl%sp2%order-1
     IF(PRESENT(nluniq_in)) THEN
        nluniq = nluniq_in
     ELSE
        nluniq = .TRUE.
     END IF
 !
 !  Back transform
     IF(nluniq) THEN
        u(1,1:n-1) = u(1,n)
     END IF
 !
 !  Periodicity
     DO j=1,nidbas
        u(:,j+n) = u(:,j)
     END DO
   END SUBROUTINE back_transf
 !--------------------------------------------------------------------------------
   DOUBLE PRECISION FUNCTION normf_gb(matm, f)
 !
 !    L2 norm of f represented by its expansion coefficients.
 !
     TYPE(gbmat), INTENT(in)      :: matm
     DOUBLE PRECISION, INTENT(in) :: f(:)
     normf_gb = SQRT(DOT_PRODUCT(f, vmx(matm,f)))
   END FUNCTION normf_gb
 !--------------------------------------------------------------------------------
   DOUBLE PRECISION FUNCTION normf_ge(matm, f)
 !
 !    L2 norm of f represented by its expansion coefficients.
 !
     TYPE(gemat), INTENT(in)      :: matm
     DOUBLE PRECISION, INTENT(in) :: f(:)
     normf_ge = SQRT(DOT_PRODUCT(f, vmx(matm,f)))
   END FUNCTION normf_ge
 !--------------------------------------------------------------------------------
   DOUBLE PRECISION FUNCTION residue_gen(grid, f, u, p)
 !
 !   Generic version of residue
 !
     TYPE(grid2d) :: grid
     DOUBLE PRECISION, INTENT(in) :: f(:), u(:)
     DOUBLE PRECISION :: r(SIZE(f))
     CHARACTER(len=*), OPTIONAL, INTENT(in) :: p
 !
     IF(ALLOCATED(grid%mata)) THEN
        residue_gen = residue_csr(grid%mata, f, u, p)
     ELSE
        residue_gen = residue_cds(grid%mata_cds, f, u, p)
     END IF
   END FUNCTION residue_gen
 !--------------------------------------------------------------------------------
   DOUBLE PRECISION FUNCTION residue_csr(mat, f, u, p)
 !
 !    L2 norm of residue ||f-Av||
 !
     TYPE(csr_mat), INTENT(in)    :: mat
     DOUBLE PRECISION, INTENT(in) :: f(:), u(:)
     DOUBLE PRECISION :: r(SIZE(f))
     CHARACTER(len=*), OPTIONAL, INTENT(in) :: p
 !
     CHARACTER(len=4) :: norm_type
     norm_type = '2'
     IF(PRESENT(p)) norm_type = p
 !
     r = f-vmx(mat,u)
     SELECT CASE (norm_type)
     CASE('1') 
        residue_csr = SUM(ABS(r))
     CASE ('2')
        residue_csr = SQRT(DOT_PRODUCT(r,r))
     CASE ('inf')
        residue_csr = MAXVAL(ABS(r))
     END SELECT
   END FUNCTION residue_csr
 !--------------------------------------------------------------------------------
   DOUBLE PRECISION FUNCTION residue_cds(mat, f, u, p)
 !
 !    L2 norm of residue ||f-Av||
 !
     TYPE(cds_mat), INTENT(in)    :: mat
     DOUBLE PRECISION, INTENT(in) :: f(:), u(:)
     DOUBLE PRECISION :: r(SIZE(f))
     CHARACTER(len=*), OPTIONAL, INTENT(in) :: p
 !
     CHARACTER(len=4) :: norm_type
     norm_type = '2'
     IF(PRESENT(p)) norm_type = p
 !
     r = f-vmx(mat,u)
     SELECT CASE (norm_type)
     CASE('1') 
        residue_cds = SUM(ABS(r))
     CASE ('2')
        residue_cds = SQRT(DOT_PRODUCT(r,r))
     CASE ('inf')
        residue_cds = MAXVAL(ABS(r))
     END SELECT
   END FUNCTION residue_cds
 !--------------------------------------------------------------------------------
   DOUBLE PRECISION FUNCTION residue_ge(mat, f, u)
 !
 !    L2 norm of residue ||f-Av||
 !
     TYPE(gemat), INTENT(in)      :: mat
     DOUBLE PRECISION, INTENT(in) :: f(:), u(:)
     DOUBLE PRECISION :: r(SIZE(f))
     r = f-vmx(mat,u)
     residue_ge = SQRT(DOT_PRODUCT(r,r))
   END FUNCTION residue_ge
 !--------------------------------------------------------------------------------
   DOUBLE PRECISION FUNCTION residue_gb(mat, f, u)
 !
 !    L2 norm of residue ||f-Av||
 !
     TYPE(gbmat), INTENT(in)      :: mat
     DOUBLE PRECISION, INTENT(in) :: f(:), u(:)
     DOUBLE PRECISION :: r(SIZE(f))
     r = f-vmx(mat,u)
     residue_gb = SQRT(DOT_PRODUCT(r,r))
   END FUNCTION residue_gb
 !--------------------------------------------------------------------------------
   SUBROUTINE ctof_massmat(splf, splc, alpha, matm)
 !
 !   Compute coarse to fine mass matrix M(h,2h)
 !
     TYPE(spline1d), INTENT(in)   :: splf ! Spline on fine mesh
     TYPE(spline1d), INTENT(in)   :: splc ! Spline on coarse mesh
     INTEGER, INTENT(in)          :: alpha
     TYPE(gemat), INTENT(out)     :: matm
 !
     LOGICAL :: nlper
     INTEGER :: nf, nxf, nc, nxc, nidbas, kl, ku
     INTEGER :: ig, ngauss
     INTEGER :: i, ic, it, jw, irow, jcol
 !
     DOUBLE PRECISION              :: contrib
     DOUBLE PRECISION, ALLOCATABLE :: xg(:), wg(:)
     DOUBLE PRECISION, ALLOCATABLE :: funf(:,:), func(:,:)
 !
     nlper = splf%period .OR. splc%period
     CALL get_dim(splf, nf, nxf, nidbas)
     CALL get_gauss(splf, ngauss)
     CALL get_dim(splc, nc, nxc, nidbas)
     kl=nidbas; ku=kl
     IF(nlper) THEN
        nf = nxf
        nc = nxc
     END IF
     CALL init(nc, 1, matm, mrows=nf)  ! Defne nf x nc matrix
 !
     ALLOCATE(xg(ngauss),wg(ngauss))
     ALLOCATE(funf(0:nidbas,1))
     ALLOCATE(func(0:nidbas,1))
     DO i=1,nxf
        ic = (i-1)/2+1
        CALL get_gauss(splf, ngauss, i, xg, wg)
        DO ig=1,ngauss
           CALL basfun(xg(ig), splf, funf, i)
           CALL basfun(xg(ig), splc, func, ic)
           DO it=0,nidbas
              DO jw=0,nidbas
                 contrib = wg(ig)*funf(it,1)*func(jw,1)*xg(ig)**alpha
                 irow = i+it;  IF(nlper) irow=MODULO(irow-1,nxf)+1
                 jcol = ic+jw; IF(nlper) jcol=MODULO(jcol-1,nxc)+1
                 CALL updtmat(matm, irow, jcol, contrib)
              END DO
           END DO
        END DO
     END DO
 !
     DEALLOCATE(xg,wg)
     DEALLOCATE(funf,func)
   END SUBROUTINE ctof_massmat
 !--------------------------------------------------------------------------------
   SUBROUTINE direct_solve_1d(grid, v)
 !
 !   1D direct solver
 !
     TYPE(grid1d), INTENT(inout)   :: grid
     DOUBLE PRECISION, INTENT(out) :: v(:)
     LOGICAL :: nlper
 !
     nlper = grid%spl%period
     IF(nlper) THEN
        IF(.NOT.ALLOCATED(grid%matap_copy)) THEN 
           ALLOCATE(grid%matap_copy)
           CALL mcopy(grid%matap, grid%matap_copy)
           CALL factor(grid%matap_copy)
        END IF
        CALL bsolve(grid%matap_copy, grid%f, v)
     ELSE
        IF(.NOT.ALLOCATED(grid%mata_copy)) THEN 
           ALLOCATE(grid%mata_copy)
           CALL mcopy(grid%mata, grid%mata_copy)
           CALL factor(grid%mata_copy)
        END IF
        CALL bsolve(grid%mata_copy, grid%f, v)
     END IF
   END SUBROUTINE direct_solve_1d
 !--------------------------------------------------------------------------------
   SUBROUTINE direct_solve_2d(grid, v, debug)
 !
 !   2D direct solver
 !
     TYPE(grid2d), INTENT(inout)     :: grid
     DOUBLE PRECISION, INTENT(inout) :: v(:)
     LOGICAL, INTENT(in), OPTIONAL   :: debug
     LOGICAL :: dbg
 !
     dbg = .FALSE.
     IF(PRESENT(debug)) dbg=debug
 !
     IF(ALLOCATED(grid%mata)) THEN
        IF(.NOT.ALLOCATED(grid%mata%mumps)) THEN
           ALLOCATE(grid%mata%mumps)
           CALL csr2mumps(grid%mata, grid%mata%mumps)
           CALL factor(grid%mata%mumps, debug=dbg)
        END IF
        CALL bsolve(grid%mata%mumps, v, debug=dbg)
     ELSE
        IF(.NOT.ALLOCATED(grid%mata_cds%mumps)) THEN
           ALLOCATE(grid%mata_cds%mumps)
           CALL cds2mumps(grid%mata_cds, grid%mata_cds%mumps)
           CALL factor(grid%mata_cds%mumps, debug=dbg)
        END IF
        CALL bsolve(grid%mata_cds%mumps, v, debug=dbg)
     END IF
 !
 !  Only cylindrical case
     IF(.NOT.grid%spl%sp1%period .AND. grid%spl%sp2%period) THEN
     END IF
   END SUBROUTINE direct_solve_2d
 !--------------------------------------------------------------------------------
   SUBROUTINE jacobi_gb(mat, omega, nu, v, f)
 !
 !   Weighted Jacobi relaxation
 !
     TYPE(gbmat),INTENT(in) :: mat
     DOUBLE PRECISION, INTENT(in)    :: omega
     INTEGER, INTENT(in)             :: nu
     DOUBLE PRECISION, INTENT(inout) :: v(:)
     DOUBLE PRECISION, INTENT(in)    :: f(:)
 !
     DOUBLE PRECISION :: temp(SIZE(v))
     DOUBLE PRECISION :: inv_diag(SIZE(v))
     INTEGER          :: k, kl, ku, n, i, j, jmin, jmax
 !
     kl = mat%kl
     ku = mat%ku
     n = mat%rank
 !
     inv_diag(:) = omega/mat%val(kl+ku+1,:)
     DO k=1,nu
        DO i=1,n
           jmin = MAX(1,i-kl)
           jmax = MIN(n, i+ku)
           temp(i) = f(i)
           DO j=jmin,i-1
              temp(i) = temp(i) - mat%val(kl+ku+i-j+1,j)*v(j)
           END DO
           DO j=i+1,jmax
              temp(i) = temp(i) - mat%val(kl+ku+i-j+1,j)*v(j)
           END DO
           temp(i) = temp(i)*inv_diag(i)
        END DO
        v(:) = (1.d0-omega)*v(:) + temp(:)   
     END DO
   END SUBROUTINE jacobi_gb
 !--------------------------------------------------------------------------------
   SUBROUTINE jacobi_ge(mat, omega, nu, v, f)
 !
 !   Weighted Jacobi relaxation
 !
     TYPE(gemat),INTENT(in) :: mat
     DOUBLE PRECISION, INTENT(in)    :: omega
     INTEGER, INTENT(in)             :: nu
     DOUBLE PRECISION, INTENT(inout) :: v(:)
     DOUBLE PRECISION, INTENT(in)    :: f(:)
 !
     DOUBLE PRECISION :: temp(SIZE(v))
     DOUBLE PRECISION :: inv_diag(SIZE(v))
     INTEGER          :: k, n, i, j
 !
     n = mat%rank
 !
     DO i=1,n
        inv_diag(i) = omega/mat%val(i,i)
     END DO
 !
     DO k=1,nu
        DO i=1,n
           temp(i) = f(i)
           DO j=1,i-1
              temp(i) = temp(i) - mat%val(i,j)*v(j)
           END DO
           DO j=i+1,n
              temp(i) = temp(i) - mat%val(i-j+1,j)*v(j)
           END DO
           temp(i) = temp(i)*inv_diag(i)
        END DO
        v(:) = (1.d0-omega)*v(:) + temp(:)   
     END DO
   END SUBROUTINE jacobi_ge
 !--------------------------------------------------------------------------------
   SUBROUTINE jacobi_csr(mat, omega, nu, v, f)
 !
 !   Weighted Jacobi relaxation
 !
     TYPE(csr_mat),INTENT(in)        :: mat
     DOUBLE PRECISION, INTENT(in)    :: omega
     INTEGER, INTENT(in)             :: nu
     DOUBLE PRECISION, INTENT(inout) :: v(:)
     DOUBLE PRECISION, INTENT(in)    :: f(:)
 !
     DOUBLE PRECISION :: temp(SIZE(v))
     DOUBLE PRECISION :: inv_diag(SIZE(v))
     INTEGER          :: k, n, i, j, jcol
 !
     n = mat%rank
 !
     inv_diag(:) = omega/mat%val(mat%idiag)
     DO k=1,nu
        temp(:) = f(:)
        DO i=1,n
           DO j = mat%irow(i), mat%irow(i+1)-1
              jcol = mat%cols(j)
              IF(jcol.NE.i) THEN   ! The diagonal
                 temp(i) = temp(i) - mat%val(j)*v(jcol)
              END IF
           END DO
        END DO
        temp(:) = temp(:)*inv_diag(:)
        v(:)    = (1.d0-omega)*v(:) + temp(:)   
     END DO
   END SUBROUTINE jacobi_csr
 !--------------------------------------------------------------------------------
   SUBROUTINE jacobi_cds(mat, omega, nu, v, f)
 !
 !   Weighted Jacobi relaxation
 !
     TYPE(cds_mat),INTENT(in)        :: mat
     DOUBLE PRECISION, INTENT(in)    :: omega
     INTEGER, INTENT(in)             :: nu
     DOUBLE PRECISION, INTENT(inout) :: v(:)
     DOUBLE PRECISION, INTENT(in)    :: f(:)
 !
     DOUBLE PRECISION :: temp(SIZE(v))
     DOUBLE PRECISION :: inv_diag(SIZE(v))
     INTEGER          :: k, n, i, id, d
 !
     n = mat%rank
 !
     inv_diag(:) = omega/mat%val(:,mat%dists(0))
     DO k=1,nu
        temp(:) = f(:)
        DO id=-mat%kl,mat%ku  ! f - (L+U)*v
           IF(id.EQ.0) CYCLE
           d = mat%dists(id)
           DO i=MAX(1,1-d), MIN(n,mat%rank-d)
              temp(i) = temp(i) - mat%val(i,id)*v(i+d)
           END DO
        END DO
        temp(:) = temp(:)*inv_diag(:)
        v(:)    = (1.d0-omega)*v(:) + temp(:)   
     END DO
   END SUBROUTINE jacobi_cds
 !--------------------------------------------------------------------------------
   SUBROUTINE gs_gb(mat, nu, v, f)
 !
 !   Gauss-Seidel relaxation
 !
     TYPE(gbmat),INTENT(in) :: mat
     INTEGER, INTENT(in)    :: nu
     DOUBLE PRECISION, INTENT(inout) :: v(:)
     DOUBLE PRECISION, INTENT(in)    :: f(:)
 !
     INTEGER          :: k, kl, ku, n, i, j, jmin, jmax
     DOUBLE PRECISION :: inv_diag(SIZE(v))
 !
     kl = mat%kl
     ku = mat%ku
     n = mat%rank
 !
     inv_diag(:) = 1.d0/mat%val(kl+ku+1,:)
     DO k=1,nu
        DO i=1,n
           jmin = MAX(1,i-kl)
           jmax = MIN(n, i+ku)
           v(i) = f(i)
           DO j=jmin,i-1
              v(i) = v(i) - mat%val(kl+ku+i-j+1,j)*v(j)
           END DO
           DO j=i+1,jmax
              v(i) = v(i) - mat%val(kl+ku+i-j+1,j)*v(j)
           END DO
           v(i) = inv_diag(i)*v(i)
        END DO
     END DO
   END SUBROUTINE gs_gb
 !--------------------------------------------------------------------------------
   SUBROUTINE gs_ge(mat, nu, v, f)
 !
 !   Gauss-Seidel relaxation
 !
     TYPE(gemat),INTENT(in) :: mat
     INTEGER, INTENT(in)    :: nu
     DOUBLE PRECISION, INTENT(inout) :: v(:)
     DOUBLE PRECISION, INTENT(in)    :: f(:)
 !
     INTEGER          :: k, n, i, j
     DOUBLE PRECISION :: inv_diag(SIZE(v))
 !
     n = mat%rank
 !
     DO i=1,n
        inv_diag(i) = 1.d0/mat%val(i,i)
     END DO
     DO k=1,nu
        DO i=1,n
           v(i) = f(i)
           DO j=1,i-1
              v(i) = v(i) - mat%val(i,j)*v(j)
           END DO
           DO j=i+1,n
              v(i) = v(i) - mat%val(i,j)*v(j)
           END DO
           v(i) = inv_diag(i)*v(i)
        END DO
     END DO
   END SUBROUTINE gs_ge
 !--------------------------------------------------------------------------------
   SUBROUTINE gs_csr(mat, nu, v, f)
 !
 !   Gauss-Seidel relaxation
 !
     TYPE(csr_mat),INTENT(in)        :: mat
     INTEGER, INTENT(in)             :: nu
     DOUBLE PRECISION, INTENT(inout) :: v(:)
     DOUBLE PRECISION, INTENT(in)    :: f(:)
 !
     DOUBLE PRECISION :: inv_diag(SIZE(v))
     INTEGER          :: k, n, i, j, jcol
 !
     n = mat%rank
 !
     inv_diag(:) = 1.0d0/mat%val(mat%idiag)
     DO k=1,nu
        DO i=1,n
           v(i) = f(i)
           DO j = mat%irow(i), mat%irow(i+1)-1
              jcol = mat%cols(j)
              IF(jcol.NE.i) THEN   ! The diagonal
                 v(i) = v(i) - mat%val(j)*v(jcol)
              END IF
           END DO
           v(i) = v(i)*inv_diag(i)
        END DO
     END DO
   END SUBROUTINE gs_csr
 !--------------------------------------------------------------------------------
   SUBROUTINE gs_cds(mat, nu, v, f)
 !
 !   Gauss-Seidel relaxation
 !
     TYPE(cds_mat),INTENT(in)        :: mat
     INTEGER, INTENT(in)             :: nu
     DOUBLE PRECISION, INTENT(inout) :: v(:)
     DOUBLE PRECISION, INTENT(in)    :: f(:)
 !
     DOUBLE PRECISION :: temp(SIZE(v))
     DOUBLE PRECISION :: inv_diag(SIZE(v))
     INTEGER          :: k, n, i, id, d
 !
     n = mat%rank
 !
     inv_diag(:) = 1.0d0/mat%val(:,mat%dists(0))
     DO k=1,nu
 !
        temp(:) = f(:)
        DO id=1,mat%ku  ! t <- f - U*v
           d = mat%dists(id)
           DO i=MAX(1,1-d), MIN(n,n-d)
              temp(i) = temp(i) - mat%val(i,id)*v(i+d)
           END DO
        END DO
 !
        DO i=1,n        ! Solve (L+D)v=t
           v(i) = temp(i)
           DO id=-1,-mat%kl,-1
              d = mat%dists(id)
              IF(i+d.LT.1) EXIT
              v(i) = v(i) - mat%val(i,id)*v(i+d)
           END DO
           v(i) = v(i)*inv_diag(i)
        END DO
     END DO
   END SUBROUTINE gs_cds
 !--------------------------------------------------------------------------------
   SUBROUTINE printmat_mat(str, val)
 !
     CHARACTER(len=*), INTENT(in) :: str
     DOUBLE PRECISION, INTENT(in)  :: val(:,:)
     INTEGER :: mrows, ncols,i
     mrows=SIZE(val,1)
     ncols=SIZE(val,2)
     WRITE(*,'(/a)') TRIM(str)
     WRITE(*,'(2(a,i6))') 'M =', mrows, ', N =', ncols
     DO i=1,mrows
        WRITE(*,'(12(1pe12.3))') val(i,:)
     END DO
     WRITE(*,'(a/(12(1pe12.3)))') 'Sum or rows', SUM(val,2)
     WRITE(*,'(a/(12(1pe12.3)))') 'Sum or cols', SUM(val,1)
   END SUBROUTINE printmat_mat
 !--------------------------------------------------------------------------------
   SUBROUTINE printmat_ge(str, mat)
 !
     CHARACTER(len=*), INTENT(in) :: str
     TYPE(gemat), INTENT(in)      :: mat
     INTEGER :: i
     DOUBLE PRECISION :: arow(mat%ncols)
     DOUBLE PRECISION :: sum_cols(mat%ncols), sum_rows(mat%mrows)
     sum_cols = 0.0d0
     arow = 0.0d0
     WRITE(*,'(/a)') TRIM(str)
     WRITE(*,'(2(a,i6))') 'M =', mat%mrows, ', N =', mat%ncols
     DO i=1,mat%mrows
        CALL getrow(mat,i,arow)
        sum_rows(i) = SUM(arow)
        sum_cols(:) = sum_cols(:) + arow(:)
        WRITE(*,'(12(1pe12.3))') arow
     END DO
     WRITE(*,'(a/(12(1pe12.3)))') 'Sum or rows', sum_rows
     WRITE(*,'(a/(12(1pe12.3)))') 'Sum or cols', sum_cols
   END SUBROUTINE printmat_ge
 !--------------------------------------------------------------------------------
   SUBROUTINE printmat_gb(str, mat)
 !
     CHARACTER(len=*), INTENT(in) :: str
     TYPE(gbmat), INTENT(in)      :: mat
     INTEGER :: i
     DOUBLE PRECISION :: arow(mat%ncols)
     DOUBLE PRECISION :: sum_cols(mat%ncols), sum_rows(mat%mrows)
     sum_cols = 0.0d0
     WRITE(*,'(/a)') TRIM(str)
     WRITE(*,'(2(a,i6))') 'M =', mat%mrows, ', N =', mat%ncols
     DO i=1,mat%mrows
        CALL getrow(mat,i,arow)
        sum_rows(i) = SUM(arow)
        sum_cols(:) = sum_cols(:) + arow(:)
        WRITE(*,'(8(1pe12.3))') arow
     END DO
     WRITE(*,'(a/(12(1pe12.3)))') 'Sum or rows', sum_rows
     WRITE(*,'(a/(12(1pe12.3)))') 'Sum or cols', sum_cols
   END SUBROUTINE printmat_gb
 !--------------------------------------------------------------------------------
   SUBROUTINE printmat_periodic(str, mat)
 !
     CHARACTER(len=*), INTENT(in) :: str
     TYPE(periodic_mat), INTENT(in)      :: mat
     INTEGER :: i
     DOUBLE PRECISION :: arow(mat%mat%ncols)
     DOUBLE PRECISION :: sum_cols(mat%mat%ncols), sum_rows(mat%mat%mrows)
     sum_cols = 0.0d0
     WRITE(*,'(/a)') TRIM(str)
     WRITE(*,'(2(a,i6))') 'M =', mat%mat%mrows, ', N =', mat%mat%ncols
     DO i=1,mat%mat%mrows
        CALL getrow(mat,i,arow)
        sum_rows(i) = SUM(arow)
        sum_cols(:) = sum_cols(:) + arow(:)
        WRITE(*,'(8(1pe12.3))') arow
     END DO
     WRITE(*,'(a/(8(1pe12.3)))') 'Sum or rows', sum_rows
     WRITE(*,'(a/(8(1pe12.3)))') 'Sum or cols', sum_cols
   END SUBROUTINE printmat_periodic
 !--------------------------------------------------------------------------------
   SUBROUTINE printdiag_gb(str, mat)
 !
     CHARACTER(len=*), INTENT(in) :: str
     TYPE(gbmat), INTENT(in)      :: mat
     INTEGER :: ku, kl
     kl = mat%kl
     ku = mat%ku
     WRITE(*,'(a/(8(1pe12.3)))') str, mat%val(kl+ku+1,:)
   END SUBROUTINE printdiag_gb
 !--------------------------------------------------------------------------------
   INTEGER FUNCTION get_lmax(n)
     INTEGER, INTENT(in) :: n
     INTEGER :: l, ncur
     l=1
     ncur = n
     DO
        IF(ncur.EQ.2 .OR. MODULO(ncur,2).NE.0) EXIT  ! Minimum N is 2 or odd.
        l=l+1
        ncur = ncur/2
     END DO
     get_lmax = l
   END FUNCTION get_lmax
 !--------------------------------------------------------------------------------
   SUBROUTINE ibc_transf(grids, dir, k)
 !
 !   Impose BC on transfer matrix
 !
     TYPE(grid2d), INTENT(inout) :: grids(:)
     INTEGER, INTENT(in)         :: dir
     INTEGER, INTENT(in)         :: k
 !
     INTEGER :: levels, l
     levels = SIZE(grids)
     DO l=2,levels
      CALL mod_transf(grids(l)%matp(dir),k)
     END DO
   END SUBROUTINE ibc_transf
 !--------------------------------------------------------------------------------
 END MODULE multigrid
diff --git a/src/mumps_mod.f90 b/src/mumps_mod.f90
index fe23546..d13a8cc 100644
--- a/src/mumps_mod.f90
+++ b/src/mumps_mod.f90
@@ -1,1728 +1,1728 @@
 !>
 !> @file mumps_mod.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE mumps_bsplines
 !
 !    MUMPS_BSPLINES: Simple interface to the sparse direct solver MUMPS
 !
 !    T.M. Tran, CRPP-EPFL
 !    June 2011
 !
   USE sparse
   IMPLICIT NONE
   INCLUDE 'dmumps_struc.h'
   INCLUDE 'zmumps_struc.h'
 !
   TYPE mumps_mat
      INTEGER :: rank, nnz
      INTEGER :: nterms, kmat
      INTEGER :: istart, iend
      INTEGER :: nnz_start, nnz_end, nnz_loc
      LOGICAL :: nlsym
      LOGICAL :: nlpos
      LOGICAL :: nlforce_zero
      TYPE(spmat), POINTER      :: mat => NULL()
      INTEGER, POINTER          :: cols(:) => NULL()
      INTEGER, POINTER          :: irow(:) => NULL()
      INTEGER, POINTER          :: perm(:) => NULL()
      DOUBLE PRECISION, POINTER :: val(:) => NULL()
      TYPE(dmumps_struc)        :: mumps_par
   END TYPE mumps_mat
 !
   TYPE zmumps_mat
      INTEGER :: rank, nnz
      INTEGER :: nterms, kmat
      INTEGER :: istart, iend
      INTEGER :: nnz_start, nnz_end, nnz_loc
      LOGICAL :: nlsym
      LOGICAL :: nlherm
      LOGICAL :: nlpos
      LOGICAL :: nlforce_zero
      TYPE(zspmat), POINTER   :: mat => NULL()
      INTEGER, POINTER        :: cols(:) => NULL()
      INTEGER, POINTER        :: irow(:) => NULL()
      INTEGER, POINTER        :: perm(:) => NULL()
      DOUBLE COMPLEX, POINTER :: val(:) => NULL()
      TYPE(zmumps_struc)      :: mumps_par
   END TYPE zmumps_mat
 !
   INTERFACE init
      MODULE PROCEDURE init_mumps_mat, init_zmumps_mat
   END INTERFACE init
 !
   INTERFACE clear_mat
      MODULE PROCEDURE clear_mumps_mat, clear_zmumps_mat
   END INTERFACE clear_mat
 !
   INTERFACE updtmat
      MODULE PROCEDURE updt_mumps_mat, updt_zmumps_mat
   END INTERFACE updtmat
 !
   INTERFACE putele
      MODULE PROCEDURE putele_mumps_mat, putele_zmumps_mat
   END INTERFACE putele
 !
   INTERFACE getele
      MODULE PROCEDURE getele_mumps_mat, getele_zmumps_mat
   END INTERFACE getele
 !
   INTERFACE putrow
      MODULE PROCEDURE putrow_mumps_mat, putrow_zmumps_mat
   END INTERFACE putrow
 !
   INTERFACE getrow
      MODULE PROCEDURE getrow_mumps_mat, getrow_zmumps_mat
   END INTERFACE getrow
 !
   INTERFACE putcol
      MODULE PROCEDURE putcol_mumps_mat, putcol_zmumps_mat
   END INTERFACE putcol
 !
   INTERFACE getcol
      MODULE PROCEDURE getcol_mumps_mat, getcol_zmumps_mat
   END INTERFACE getcol
 !
   INTERFACE get_count
      MODULE PROCEDURE get_count_mumps_mat, get_count_zmumps_mat
   END INTERFACE get_count
 !
   INTERFACE to_mat
      MODULE PROCEDURE to_mumps_mat, to_zmumps_mat
   END INTERFACE to_mat
 !
   INTERFACE reord_mat
      MODULE PROCEDURE reord_mumps_mat, reord_zmumps_mat
   END INTERFACE reord_mat
 !
   INTERFACE numfact
      MODULE PROCEDURE numfact_mumps_mat, numfact_zmumps_mat
   END INTERFACE numfact
 !
   INTERFACE factor
      MODULE PROCEDURE factor_mumps_mat, factor_zmumps_mat
   END INTERFACE factor
 !
   INTERFACE bsolve
      MODULE PROCEDURE bsolve_mumps_mat1,  bsolve_mumps_matn, &
           &           bsolve_zmumps_mat1, bsolve_zmumps_matn
   END INTERFACE bsolve
 !
   INTERFACE vmx
      MODULE PROCEDURE vmx_mumps_mat,  vmx_mumps_matn, &
           &           vmx_zmumps_mat, vmx_zmumps_matn
   END INTERFACE vmx
 !
   INTERFACE destroy
      MODULE PROCEDURE destroy_mumps_mat, destroy_zmumps_mat
  END INTERFACE destroy
 !
  INTERFACE putmat
     MODULE PROCEDURE put_mumps_mat, put_zmumps_mat
  END INTERFACE putmat
 !
  INTERFACE getmat
     MODULE PROCEDURE get_mumps_mat, get_zmumps_mat
  END INTERFACE getmat
 !
  INTERFACE mcopy
     MODULE PROCEDURE mcopy_mumps_mat, mcopy_zmumps_mat
  END INTERFACE mcopy
 !
  INTERFACE maddto
     MODULE PROCEDURE maddto_mumps_mat, maddto_zmumps_mat
  END INTERFACE maddto
 !
  INTERFACE psum_mat
     MODULE PROCEDURE psum_mumps_mat, psum_zmumps_mat
  END INTERFACE psum_mat
 !
  INTERFACE p2p_mat
     MODULE PROCEDURE p2p_mumps_mat, p2p_zmumps_mat
  END INTERFACE p2p_mat
 !
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE init_mumps_mat(n, nterms, mat, kmat, nlsym, nlpos, &
        &                      nlforce_zero, comm_in)
 !
 !   Initialize an empty sparse mumps matrix
 !
     USE pputils2
     INCLUDE 'mpif.h'
     INTEGER, INTENT(in)           :: n, nterms
     TYPE(mumps_mat)               :: mat
     INTEGER, OPTIONAL, INTENT(in) :: kmat
     LOGICAL, OPTIONAL, INTENT(in) :: nlsym
     LOGICAL, OPTIONAL, INTENT(in) :: nlpos
     LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
     INTEGER, OPTIONAL, INTENT(in) :: comm_in
 !
     INTEGER :: comm, nloc
 !
     comm = MPI_COMM_SELF    ! Default is serial!
     IF(PRESENT(comm_in)) comm = comm_in
 !
     mat%rank = n
     mat%nterms = nterms
     mat%nnz = 0
     mat%nlsym = .FALSE.
     mat%nlpos = .TRUE.
     mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
     IF(PRESENT(kmat)) mat%kmat = kmat
     IF(PRESENT(nlsym)) mat%nlsym = nlsym
     IF(PRESENT(nlpos)) mat%nlpos = nlpos
     IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
 !
 !    Matrix partition
 !
     CALL dist1d(comm, 1, n, mat%istart, nloc)
     mat%iend = mat%istart + nloc - 1
 !
     IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat)
     ALLOCATE(mat%mat)
     CALL init(n, mat%mat, mat%istart, mat%iend)
 !
 !    Initialize a MUMPS instance
 !
     mat%mumps_par%N = n
     mat%mumps_par%NZ = 0
     mat%mumps_par%COMM = comm
     mat%mumps_par%PAR = 1        ! Host involved in calculations
     IF(mat%nlsym) THEN
        IF(mat%nlpos) THEN
           mat%mumps_par%SYM = 1  ! symmetric, positive definite
        ELSE
           mat%mumps_par%SYM = 2  ! symmetric, indefinite
        END IF
     ELSE
        mat%mumps_par%SYM = 0     ! unsymmetric
     END IF
 !
     mat%mumps_par%JOB = -1        ! Init phase
     CALL dmumps(mat%mumps_par)
 !
 !    Nullify MUMPS pointers for distributed matrix
 !
     NULLIFY(mat%mumps_par%A_loc)
     NULLIFY(mat%mumps_par%IRN_loc)
     NULLIFY(mat%mumps_par%JCN_loc)
     NULLIFY(mat%mumps_par%RHS)
 !
   END SUBROUTINE init_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE init_zmumps_mat(n, nterms, mat, kmat, nlsym, nlherm, &
        &                      nlpos, nlforce_zero, comm_in)
 !
 !   Initialize an empty sparse mumps matrix
 !
     USE pputils2
     INCLUDE 'mpif.h'
     INTEGER, INTENT(in)           :: n, nterms
     TYPE(zmumps_mat)              :: mat
     INTEGER, OPTIONAL, INTENT(in) :: kmat
     LOGICAL, OPTIONAL, INTENT(in) :: nlsym
     LOGICAL, OPTIONAL, INTENT(in) :: nlherm
     LOGICAL, OPTIONAL, INTENT(in) :: nlpos
     LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
     INTEGER, OPTIONAL, INTENT(in) :: comm_in
 !
     INTEGER :: comm, nloc
 !
     comm = MPI_COMM_SELF    ! Default is serial!
     IF(PRESENT(comm_in)) comm = comm_in
 !
     mat%rank = n
     mat%nterms = nterms
     mat%nnz = 0
     mat%nlsym = .FALSE.
     mat%nlherm = .FALSE.
     mat%nlpos = .TRUE.
     mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
     IF(PRESENT(kmat)) mat%kmat = kmat
     IF(PRESENT(nlsym)) mat%nlsym = nlsym
     IF(PRESENT(nlpos)) mat%nlpos = nlpos
     IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
 !
 !    Matrix partition
 !
     CALL dist1d(comm, 1, n, mat%istart, nloc)
     mat%iend = mat%istart + nloc - 1
 !
     IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat)
     ALLOCATE(mat%mat)
     CALL init(n, mat%mat, mat%istart, mat%iend)
 !
 !    Initialize a MUMPS instance
 !
     mat%mumps_par%N = n
     mat%mumps_par%NZ = 0
     mat%mumps_par%COMM = comm
     mat%mumps_par%PAR = 1        ! Host involved in calculations
     mat%mumps_par%SYM = 0        ! General unsymmetric
     IF(mat%nlsym) THEN
        IF(mat%nlpos) THEN
           mat%mumps_par%SYM = 1  ! symmetric, positive definite
        ELSE
           mat%mumps_par%SYM = 2  ! symmetric, indefinite
        END IF
     END IF
 !
     mat%mumps_par%JOB = -1       ! Init phase
     CALL zmumps(mat%mumps_par)
 !
 ! WARNING: SYM=1 is currently (version 4.10.0) is treated as SYM=2.
 ! The Hermitian case is not implemented yet!
 !
 !    Nullify MUMPS pointers for distributed matrix
 !
     NULLIFY(mat%mumps_par%A_loc)
     NULLIFY(mat%mumps_par%IRN_loc)
     NULLIFY(mat%mumps_par%JCN_loc)
     NULLIFY(mat%mumps_par%RHS)
 !
   END SUBROUTINE init_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE clear_mumps_mat(mat)
 !
 !   Clear matrix, keeping its sparse structure unchanged
 !
     TYPE(mumps_mat) :: mat
 !
     mat%val = 0.0d0
   END SUBROUTINE clear_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE clear_zmumps_mat(mat)
 !
 !   Clear matrix, keeping its sparse structure unchanged
 !
     TYPE(zmumps_mat) :: mat
 !
     mat%val = (0.0d0, 0.0d0)
   END SUBROUTINE clear_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE updt_mumps_mat(mat, i, j, val)
 !
 !   Update element Aij of mumps  matrix
 !
     TYPE(mumps_mat)              :: mat
     INTEGER, INTENT(in)          :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
     INTEGER :: k, s, e
 !
     IF(mat%nlsym) THEN   ! Store only upper part for symmetric matrices
        IF(i.GT.j) RETURN
     END IF
     IF(i.LT.mat%istart .OR. i.GT.mat%iend) THEN
        WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
        WRITE(*,'(a,2i6)') '      istart, iend      ', mat%istart, mat%iend
        STOP '*** Abnormal EXIT in MODULE mumps_mod ***'
     END IF
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL updtmat(mat%mat, i, j, val)
     ELSE
        s = mat%irow(i) - mat%nnz_start + 1
        e = mat%irow(i+1) - mat%nnz_start
        k = isearch(mat%cols(s:e), j)
        IF( k.GE.0 ) THEN
           mat%val(s+k) =  mat%val(s+k)+val
        ELSE
           WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
           STOP '*** Abnormal EXIT in MODULE mumps_mod ***'
        END IF
     END IF
   END SUBROUTINE updt_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE updt_zmumps_mat(mat, i, j, val)
 !
 !   Update element Aij of mumps  matrix
 !
     TYPE(zmumps_mat)           :: mat
     INTEGER, INTENT(in)        :: i, j
     DOUBLE COMPLEX, INTENT(in) :: val
     INTEGER :: k, s, e
 !
     IF(mat%nlherm .OR. mat%nlsym) THEN   ! Store only upper part
        IF(i.GT.j) RETURN
     END IF
     IF(i.LT.mat%istart .OR. i.GT.mat%iend) THEN
        WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
        WRITE(*,'(a,2i6)') '      istart, iend      ', mat%istart, mat%iend
        STOP '*** Abnormal EXIT in MODULE mumps_mod ***'
     END IF
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL updtmat(mat%mat, i, j, val)
     ELSE
        s = mat%irow(i) - mat%nnz_start + 1
        e = mat%irow(i+1) - mat%nnz_start
        k = isearch(mat%cols(s:e), j)
        IF( k.GE.0 ) THEN
           mat%val(s+k) =  mat%val(s+k)+val
        ELSE
           WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
           STOP '*** Abnormal EXIT in MODULE mumps_mod ***'
        END IF
     END IF
   END SUBROUTINE updt_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putele_mumps_mat(mat, i, j, val)
 !
 !  Put element (i,j) of matrix
 !
     TYPE(mumps_mat)            :: mat
     INTEGER, INTENT(in)          :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
     INTEGER :: iput, jput
     INTEGER :: k, s, e
 !
     iput = i
     jput = j
     IF(mat%nlsym) THEN
        IF( i.GT.j ) THEN    ! Lower triangular part
           iput = j
           jput = i
        END IF
     END IF
 !
 !    Do nothing if outside
     IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL putele(mat%mat, iput, jput, val, &
             &      nlforce_zero=mat%nlforce_zero)
     ELSE
        s = mat%irow(iput) - mat%nnz_start + 1
        e = mat%irow(iput+1) - mat%nnz_start
        k = isearch(mat%cols(s:e), jput)
        IF( k.GE.0 ) THEN
           mat%val(s+k) =  val
        ELSE
           IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
              WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
              STOP '*** Abnormal EXIT in MODULE mumps_mod ***'
           END IF
        END IF
    END IF
   END SUBROUTINE putele_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putele_zmumps_mat(mat, i, j, val)
 !
 !  Put element (i,j) of matrix
 !
     TYPE(zmumps_mat)         :: mat
     INTEGER, INTENT(in)        :: i, j
     DOUBLE COMPLEX, INTENT(in) :: val
     DOUBLE COMPLEX :: valput
     INTEGER :: iput, jput
     INTEGER :: k, s, e
 !
     iput = i
     jput = j
     valput = val
     IF(mat%nlherm .OR. mat%nlsym) THEN
        IF( i.GT.j ) THEN    ! Lower triangular part
           iput = j
           jput = i
           IF(mat%nlherm) THEN
              valput = CONJG(val)
           ELSE
              valput = val
           END IF
        END IF
     END IF
 !
 !    Do nothing if outside
     IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL putele(mat%mat, iput, jput, valput, &
             &      nlforce_zero=mat%nlforce_zero)
     ELSE
        s = mat%irow(iput) - mat%nnz_start + 1
        e = mat%irow(iput+1) - mat%nnz_start
        k = isearch(mat%cols(s:e), jput)
        IF( k.GE.0 ) THEN
           mat%val(s+k) =  valput
        ELSE
           IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
              WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
              STOP '*** Abnormal EXIT in MODULE mumps_mod ***'
           END IF
        END IF
    END IF
  END SUBROUTINE putele_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getele_mumps_mat(mat, i, j, val)
 !
 !   Get element (i,j) of sparse matrix
 !
     TYPE(mumps_mat)               :: mat
     INTEGER, INTENT(in)           :: i, j
     DOUBLE PRECISION, INTENT(out) :: val
     INTEGER :: iget, jget
     INTEGER :: k, s, e
 !
     iget = i
     jget = j
     IF(mat%nlsym) THEN
        IF( i.GT.j ) THEN    ! Lower triangular part
           iget = j
           jget = i
        END IF
     END IF
 !
     val = 0.0d0   ! Assume zero val if outside
     IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL getele(mat%mat, iget, jget, val)
     ELSE
        s = mat%irow(iget) - mat%nnz_start + 1
        e = mat%irow(iget+1) - mat%nnz_start
        k = isearch(mat%cols(s:e), jget)
        IF( k.GE.0 ) THEN
           val =mat%val(s+k) 
        ELSE
           val = 0.0d0   ! Assume zero val if not found
        END IF
     END IF
   END SUBROUTINE getele_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getele_zmumps_mat(mat, i, j, val)
 !
 !   Get element (i,j) of sparse matrix
 !
     TYPE(zmumps_mat)            :: mat
     INTEGER, INTENT(in)         :: i, j
     DOUBLE COMPLEX, INTENT(out) :: val
     DOUBLE COMPLEX :: valget
     INTEGER :: iget, jget
     INTEGER :: k, s, e
 !
     iget = i
     jget = j
     IF(mat%nlherm .OR. mat%nlsym) THEN
        IF( i.GT.j ) THEN    ! Lower triangular part
           iget = j
           jget = i
        END IF
     END IF
 !
     val = (0.0d0, 0.0d0)   ! Assume zero val if outside
     IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL getele(mat%mat, iget, jget, valget)
     ELSE
        s = mat%irow(iget) - mat%nnz_start + 1
        e = mat%irow(iget+1) - mat%nnz_start
        k = isearch(mat%cols(s:e), jget)
        IF( k.GE.0 ) THEN
           valget =mat%val(s+k) 
        ELSE
           valget = (0.0d0,0.0d0)   ! Assume zero val if not found
        END IF
     END IF
     val = valget
     IF( i.GT.j ) THEN
        IF(mat%nlherm) THEN 
           val = CONJG(valget)
        END IF
     END IF
   END SUBROUTINE getele_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putrow_mumps_mat(amat, i, arr)
 !
 ! Put a row into sparse matrix
 !
     TYPE(mumps_mat), INTENT(inout)   :: amat
     INTEGER, INTENT(in)              :: i
     DOUBLE PRECISION, INTENT(in)     :: arr(:)
     INTEGER :: j
 !
     DO j=1,amat%rank
        CALL putele(amat, i, j, arr(j))
     END DO
   END SUBROUTINE putrow_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putrow_zmumps_mat(amat, i, arr)
 !
 ! Put a row into sparse matrix
 !
     TYPE(zmumps_mat), INTENT(inout)   :: amat
     INTEGER, INTENT(in)               :: i
     DOUBLE COMPLEX, INTENT(in)        :: arr(:)
     INTEGER :: j
 !
     DO j=1,amat%rank
        CALL putele(amat, i, j, arr(j))
     END DO
   END SUBROUTINE putrow_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getrow_mumps_mat(amat, i, arr)
 !
 !   Get a row from sparse matrix
 !
     TYPE(mumps_mat), INTENT(in)    :: amat
     INTEGER, INTENT(in)            :: i
     DOUBLE PRECISION, INTENT(out)  :: arr(:)
     INTEGER :: j
 !
     DO j=1,amat%rank
        CALL getele(amat, i, j, arr(j))
     END DO
   END SUBROUTINE getrow_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getrow_zmumps_mat(amat, i, arr)
 !
 !   Get a row from sparse matrix
 !
     TYPE(zmumps_mat), INTENT(in)    :: amat
     INTEGER, INTENT(in)             :: i
     DOUBLE COMPLEX, INTENT(out)     :: arr(:)
     INTEGER :: j
 !
     DO j=1,amat%rank
        CALL getele(amat, i, j, arr(j))
     END DO
   END SUBROUTINE getrow_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putcol_mumps_mat(amat, j, arr)
 !
 ! Put a column into sparse matrix
 !
     TYPE(mumps_mat), INTENT(inout)   :: amat
     INTEGER, INTENT(in)              :: j
     DOUBLE PRECISION, INTENT(in)     :: arr(:)
     INTEGER :: i
 !
     DO i=amat%istart,amat%iend
        CALL putele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE putcol_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putcol_zmumps_mat(amat, j, arr)
 !
 ! Put a column into sparse matrix
 !
     TYPE(zmumps_mat), INTENT(inout)   :: amat
     INTEGER, INTENT(in)               :: j
     DOUBLE COMPLEX, INTENT(in)        :: arr(:)
     INTEGER :: i
 !
     DO i=amat%istart,amat%iend
        CALL putele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE putcol_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getcol_mumps_mat(amat, j, arr)
 !
 !   Get a column from sparse matrix
 !
     TYPE(mumps_mat), INTENT(in)    :: amat
     INTEGER, INTENT(in)            :: j
     DOUBLE PRECISION, INTENT(out)  :: arr(:)
     INTEGER :: i
 !
     DO i=amat%istart,amat%iend
        CALL getele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE getcol_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getcol_zmumps_mat(amat, j, arr)
 !
 !   Get a column from sparse matrix
 !
     TYPE(zmumps_mat), INTENT(in)    :: amat
     INTEGER, INTENT(in)             :: j
     DOUBLE COMPLEX, INTENT(out)     :: arr(:)
     INTEGER :: i
 !
     DO i=amat%istart,amat%iend
        CALL getele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE getcol_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   INTEGER FUNCTION get_count_mumps_mat(mat, nnz)
 !
 !   Number of non-zeros in sparse matrix
 !
     TYPE(mumps_mat)                :: mat
     INTEGER, INTENT(out), OPTIONAL :: nnz(:)
     INTEGER :: i
 !
     IF(ASSOCIATED(mat%mat)) THEN
        get_count_mumps_mat = get_count(mat%mat, nnz)
     ELSE
        get_count_mumps_mat = mat%nnz
        IF(PRESENT(nnz)) THEN
           DO i=mat%istart,mat%iend
              nnz(i) = mat%irow(i+1)-mat%irow(i)
           END DO
        END IF
     END IF
   END FUNCTION get_count_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   INTEGER FUNCTION get_count_zmumps_mat(mat, nnz)
 !
 !   Number of non-zeros in sparse matrix
 !
     TYPE(zmumps_mat)               :: mat
     INTEGER, INTENT(out), OPTIONAL :: nnz(:)
     INTEGER :: i
 !
     IF(ASSOCIATED(mat%mat)) THEN
        get_count_zmumps_mat = get_count(mat%mat, nnz)
     ELSE
        get_count_zmumps_mat = mat%nnz
        IF(PRESENT(nnz)) THEN
           DO i=mat%istart,mat%iend
              nnz(i) = mat%irow(i+1)-mat%irow(i)
           END DO
        END IF
     END IF
   END FUNCTION get_count_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE to_mumps_mat(mat, nlkeep)
 !
 !   Convert linked list spmat to mumps matrice structure
 !
     INCLUDE 'mpif.h'
     TYPE(mumps_mat)               :: mat
     LOGICAL, INTENT(in), OPTIONAL :: nlkeep
     INTEGER :: i, nnz, rank, s, e
     INTEGER :: comm, ierr, nnz_loc
     LOGICAL :: nlclean
 !
     nlclean = .TRUE.
     IF(PRESENT(nlkeep)) THEN
        nlclean = .NOT. nlkeep
     END IF
 !
     comm = mat%mumps_par%COMM
     mat%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix
 !
 !    Allocate the Mumps matrix structure
 !      CSR format: (cols, irow, val) or (JCN, irow, A)
 !      COO format: (IRN, JCN, A)  or (IRN, cols, val)
 !
     rank = mat%rank
     nnz_loc = get_count(mat)
     mat%nnz_start = 0
     CALL mpi_exscan(nnz_loc, mat%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
     mat%nnz_start = mat%nnz_start + 1
     mat%nnz_end = mat%nnz_start + nnz_loc - 1
     mat%nnz_loc = nnz_loc
     CALL mpi_allreduce(nnz_loc, mat%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
 !
     mat%mumps_par%N = rank
     mat%mumps_par%NZ_loc = nnz_loc
 !
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     ALLOCATE(mat%val(nnz_loc))
     ALLOCATE(mat%cols(nnz_loc))
     ALLOCATE(mat%irow(mat%istart:mat%iend+1))
 !
     IF(ASSOCIATED(mat%mumps_par%IRN_loc)) DEALLOCATE(mat%mumps_par%IRN_loc)
     ALLOCATE(mat%mumps_par%IRN_loc(nnz_loc))
     mat%mumps_par%A_loc => mat%val
     mat%mumps_par%JCN_loc => mat%cols
 !
 !    Fill Mumps structure and deallocate the sparse rows
 !
     mat%irow(mat%istart) = mat%nnz_start
     DO i=mat%istart,mat%iend
        mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz
        s = mat%irow(i) - mat%nnz_start + 1
        e = mat%irow(i+1) - mat%nnz_start
        CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) 
        mat%mumps_par%IRN_loc(s:e) = i
        IF(nlclean) CALL destroy(mat%mat%row(i))
     END DO
     IF(nlclean) DEALLOCATE(mat%mat)
   END SUBROUTINE to_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE to_zmumps_mat(mat, nlkeep)
 !
 !   Convert linked list spmat to mumps matrice structure
 !
     INCLUDE 'mpif.h'
     TYPE(zmumps_mat)              :: mat
     LOGICAL, INTENT(in), OPTIONAL :: nlkeep
     INTEGER :: i, nnz, rank, s, e
     INTEGER :: comm, ierr, nnz_loc
     LOGICAL :: nlclean
 !
     nlclean = .TRUE.
     IF(PRESENT(nlkeep)) THEN
        nlclean = .NOT. nlkeep
     END IF
 !
     comm = mat%mumps_par%COMM
     mat%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix
 !
 !    Allocate the Mumps matrix structure
 !      CSR format: (cols, irow, val) or (JCN, irow, A)
 !      COO format: (IRN, JCN, A)  or (IRN, cols, val)
 !
     rank = mat%rank
     nnz_loc = get_count(mat)
     mat%nnz_start = 0
     CALL mpi_exscan(nnz_loc, mat%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
     mat%nnz_start = mat%nnz_start + 1
     mat%nnz_end = mat%nnz_start + nnz_loc - 1
     mat%nnz_loc = nnz_loc
     CALL mpi_allreduce(nnz_loc, mat%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
 !
     mat%mumps_par%N = rank
     mat%mumps_par%NZ_loc = nnz_loc
 !
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     ALLOCATE(mat%val(nnz_loc))
     ALLOCATE(mat%cols(nnz_loc))
     ALLOCATE(mat%irow(mat%istart:mat%iend+1))
 !
     IF(ASSOCIATED(mat%mumps_par%IRN_loc)) DEALLOCATE(mat%mumps_par%IRN_loc)
     ALLOCATE(mat%mumps_par%IRN_loc(nnz_loc))
     mat%mumps_par%A_loc => mat%val
     mat%mumps_par%JCN_loc => mat%cols
 !
 !    Fill Mumps structure and deallocate the sparse rows
 !
     mat%irow(mat%istart) = mat%nnz_start
     DO i=mat%istart,mat%iend
        mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz
        s = mat%irow(i) - mat%nnz_start + 1
        e = mat%irow(i+1) - mat%nnz_start
        CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) 
        mat%mumps_par%IRN_loc(s:e) = i
        IF(nlclean) CALL destroy(mat%mat%row(i))
     END DO
     IF(nlclean) DEALLOCATE(mat%mat)
   END SUBROUTINE to_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE reord_mumps_mat(mat, nlmetis, debug)
 !
 !   Reordering and symbolic factorization
 !
     TYPE(mumps_mat)               :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
     LOGICAL, OPTIONAL, INTENT(in) :: debug
 !
 !    Verbose messages
 !
     mat%mumps_par%ICNTL(3) = 0
     IF(PRESENT(debug)) THEN
        IF(debug) mat%mumps_par%ICNTL(3) = 6
     END IF
 !
 !    Ordering
 !
     mat%mumps_par%ICNTL(7) = 7  ! Automatic choice
     IF(PRESENT(nlmetis)) THEN
        IF(nlmetis) mat%mumps_par%ICNTL(7) = 5 ! use METIS nested dissection
     END IF
 !
     mat%mumps_par%JOB = 1
     CALL dmumps(mat%mumps_par)
     mat%perm => mat%mumps_par%SYM_PERM
 
     IF(mat%mumps_par%INFOG(1).NE.0) THEN
        WRITE(*,'(a,2i12)') 'REORD: Reordering failed with error', &
             &             mat%mumps_par%INFOG(1:2)
        STOP
     END IF
   END SUBROUTINE reord_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE reord_zmumps_mat(mat, nlmetis, debug)
 !
 !   Reordering and symbolic factorization
 !
     TYPE(zmumps_mat)              :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
     LOGICAL, OPTIONAL, INTENT(in) :: debug
 !
 !    Verbose messages
 !
     mat%mumps_par%ICNTL(3) = 0
     IF(PRESENT(debug)) THEN
        IF(debug) mat%mumps_par%ICNTL(3) = 6
     END IF
 !
 !    Ordering
 !
     mat%mumps_par%ICNTL(7) = 7  ! Automatic choice
     IF(PRESENT(nlmetis)) THEN
        IF(nlmetis) mat%mumps_par%ICNTL(7) = 5 ! use METIS nested dissection
     END IF
 !
     mat%mumps_par%JOB = 1
     CALL zmumps(mat%mumps_par)
 
     IF(mat%mumps_par%INFOG(1).NE.0) THEN
        WRITE(*,'(a,2i12)') 'REORD: Reordering failed with error', &
             &             mat%mumps_par%INFOG(1:2)
        STOP
     END IF
   END SUBROUTINE reord_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE numfact_mumps_mat(mat, debug)
 !
 !   Numerical factorization
 !
     TYPE(mumps_mat)               :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: debug
 !
 !    Verbose messages
 !
     mat%mumps_par%ICNTL(3) = 0
     IF(PRESENT(debug)) THEN
        IF(debug) mat%mumps_par%ICNTL(3) = 6
     END IF
 !
     mat%mumps_par%JOB = 2
     CALL dmumps(mat%mumps_par)
 
     IF(mat%mumps_par%INFOG(1).NE.0) THEN
        WRITE(*,'(a,2i12)') 'FACTOR: Reordering failed with error', &
             &             mat%mumps_par%INFOG(1:2)
        STOP
     END IF
 !
   END SUBROUTINE numfact_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE numfact_zmumps_mat(mat, debug)
 !
 !   Numerical factorization
 !
     TYPE(zmumps_mat)              :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: debug
 !
 !    Verbose messages
 !
     mat%mumps_par%ICNTL(3) = 0
     IF(PRESENT(debug)) THEN
        IF(debug) mat%mumps_par%ICNTL(3) = 6
     END IF
 !
     mat%mumps_par%JOB = 2
     CALL zmumps(mat%mumps_par)
 
     IF(mat%mumps_par%INFOG(1).NE.0) THEN
        WRITE(*,'(a,2i12)') 'FACTOR: Reordering failed with error', &
             &             mat%mumps_par%INFOG(1:2)
        STOP
     END IF
 !
   END SUBROUTINE numfact_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE factor_mumps_mat(mat, nlreord, nlmetis, debug)
 !
 !  Factor (create  +reorder + factor) a mumps_mat matrix
 !
     TYPE(mumps_mat)             :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: nlreord
     LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
     LOGICAL, OPTIONAL, INTENT(in) :: debug
     LOGICAL :: mlreord
 !----------------------------------------------------------------------
 !               1.0  Creation from the sparse rows
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL to_mat(mat)
     END IF
 !----------------------------------------------------------------------
 !               2.0  Reordering and symbolic factorization phase
 !
     mlreord = .TRUE.
     IF(PRESENT(nlreord)) mlreord = nlreord
     IF(mlreord) THEN
        CALL reord_mat(mat, nlmetis, debug)
     END IF
 !----------------------------------------------------------------------
 !               3.0  Numerical factorization
 !
     CALL numfact(mat, debug)
   END SUBROUTINE factor_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE factor_zmumps_mat(mat, nlreord, nlmetis, debug)
 !
 !  Factor (create  +reorder + factor) a mumps_mat matrix
 !
     TYPE(zmumps_mat)            :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: nlreord
     LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
     LOGICAL, OPTIONAL, INTENT(in) :: debug
     LOGICAL :: mlreord
 !----------------------------------------------------------------------
 !               1.0  Creation from the sparse rows
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL to_mat(mat)
     END IF
 !----------------------------------------------------------------------
 !               2.0  Reordering and symbolic factorization phase
 !
     mlreord = .TRUE.
     IF(PRESENT(nlreord)) mlreord = nlreord
     IF(mlreord) THEN
        CALL reord_mat(mat, nlmetis, debug)
     END IF
 !----------------------------------------------------------------------
 !               3.0  Numerical factorization
 !
     CALL numfact(mat, debug)
   END SUBROUTINE factor_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE bsolve_mumps_mat1(mat, rhs, sol, nref, debug)
 !
 !   Backsolve, using Mumps
 !
     INCLUDE 'mpif.h'
     TYPE(mumps_mat)                           :: mat
     DOUBLE PRECISION, INTENT(inout)           :: rhs(:)
     DOUBLE PRECISION, OPTIONAL, INTENT(inout) :: sol(:)
     INTEGER, OPTIONAL, INTENT(in)             :: nref
     LOGICAL, OPTIONAL, INTENT(in)             :: debug
 !
     INTEGER :: nrank, ierr
 !
     nrank = SIZE(rhs,1)
 !
 !    Verbose messages
 !
     mat%mumps_par%ICNTL(3) = 0
     IF(PRESENT(debug)) THEN
        IF(debug) mat%mumps_par%ICNTL(3) = 6
     END IF
 !
     IF(mat%mumps_par%MYID .EQ. 0) THEN
        mat%mumps_par%NRHS = 1
        mat%mumps_par%LRHS = nrank
        mat%mumps_par%ICNTL(10) = 0 ! Max numbers of iterative refinement steps
        IF(PRESENT(nref)) mat%mumps_par%ICNTL(10) = nref
 !
        ALLOCATE(mat%mumps_par%RHS(nrank))
        mat%mumps_par%RHS = rhs
     END IF
 !
     mat%mumps_par%JOB = 3
     CALL dmumps(mat%mumps_par)
 !
 !    The solution will be broadcasted to everyone
 !
     IF(PRESENT(sol)) THEN
        IF(mat%mumps_par%MYID .EQ. 0) sol=mat%mumps_par%RHS
        CALL mpi_bcast(sol, nrank, MPI_DOUBLE_PRECISION, &
             &         0, mat%mumps_par%COMM, ierr)
     ELSE
        IF(mat%mumps_par%MYID .EQ. 0) rhs=mat%mumps_par%RHS
        CALL mpi_bcast(rhs, nrank, MPI_DOUBLE_PRECISION, &
             &         0, mat%mumps_par%COMM, ierr)
     END IF
 !
     IF(mat%mumps_par%MYID .EQ. 0) THEN
        DEALLOCATE(mat%mumps_par%RHS)
        IF(mat%mumps_par%INFOG(1).NE.0) THEN
           WRITE(*,'(a,2i12)') 'BSOLVE: Reordering failed with error', &
             &             mat%mumps_par%INFOG(1:2)
           STOP
        END IF
     END IF
   END SUBROUTINE bsolve_mumps_mat1
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE bsolve_zmumps_mat1(mat, rhs, sol, nref, debug)
 !
 !   Backsolve, using Mumps
 !
     INCLUDE 'mpif.h'
     TYPE(zmumps_mat)                        :: mat
     DOUBLE COMPLEX, INTENT(inout)           :: rhs(:)
     DOUBLE COMPLEX, OPTIONAL, INTENT(inout) :: sol(:)
     INTEGER, OPTIONAL, INTENT(in)           :: nref
     LOGICAL, OPTIONAL, INTENT(in)           :: debug
 !
     INTEGER ::  nrank, ierr
 !
     nrank = SIZE(rhs,1)
 !
 !    Verbose messages
 !
     mat%mumps_par%ICNTL(3) = 0
     IF(PRESENT(debug)) THEN
        IF(debug) mat%mumps_par%ICNTL(3) = 6
     END IF
 !
     IF(mat%mumps_par%MYID .EQ. 0) THEN
        mat%mumps_par%NRHS = 1
        mat%mumps_par%LRHS = nrank
        mat%mumps_par%ICNTL(10) = 0 ! Max numbers of iterative refinement steps
        IF(PRESENT(nref)) mat%mumps_par%ICNTL(10) = nref
 !
        ALLOCATE(mat%mumps_par%RHS(nrank))
        mat%mumps_par%RHS = rhs
     END IF
 !
     mat%mumps_par%JOB = 3
     CALL zmumps(mat%mumps_par)
 !
 !    The solution will be broadcasted to everyone
 !
     IF(PRESENT(sol)) THEN
        IF(mat%mumps_par%MYID .EQ. 0) sol=mat%mumps_par%RHS
        CALL mpi_bcast(sol, SIZE(rhs), MPI_DOUBLE_COMPLEX, &
             &         0, mat%mumps_par%COMM, ierr)
     ELSE
        IF(mat%mumps_par%MYID .EQ. 0) rhs=mat%mumps_par%RHS
        CALL mpi_bcast(rhs, SIZE(rhs), MPI_DOUBLE_COMPLEX, &
             &         0, mat%mumps_par%COMM, ierr)
     END IF
 !
     IF(mat%mumps_par%MYID .EQ. 0) THEN
        DEALLOCATE(mat%mumps_par%RHS)
        IF(mat%mumps_par%INFOG(1).NE.0) THEN
           WRITE(*,'(a,2i12)') 'BSOLVE: Reordering failed with error', &
             &             mat%mumps_par%INFOG(1:2)
           STOP
        END IF
     END IF
   END SUBROUTINE bsolve_zmumps_mat1
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE bsolve_mumps_matn(mat, rhs, sol, nref, debug)
 !
 !   Backsolve, using Mumps
 !
     INCLUDE 'mpif.h'
     TYPE(mumps_mat)                           :: mat
     DOUBLE PRECISION, INTENT(inout)           :: rhs(:,:)
     DOUBLE PRECISION, OPTIONAL, INTENT(inout) :: sol(:,:)
     INTEGER, OPTIONAL, INTENT(in)             :: nref
     LOGICAL, OPTIONAL, INTENT(in)             :: debug
 !
     INTEGER :: nrank, nrhs, ierr
 !
     nrank = SIZE(rhs,1)
     nrhs = SIZE(rhs,2)
 !
 !    Verbose messages
 !
     mat%mumps_par%ICNTL(3) = 0
     IF(PRESENT(debug)) THEN
        IF(debug) mat%mumps_par%ICNTL(3) = 6
     END IF
 !
     IF(mat%mumps_par%MYID .EQ. 0) THEN
        mat%mumps_par%NRHS = nrhs
        mat%mumps_par%LRHS = nrank
        mat%mumps_par%ICNTL(10) = 0 ! Max numbers of iterative refinement steps
        IF(PRESENT(nref)) mat%mumps_par%ICNTL(10) = nref
 !
        ALLOCATE(mat%mumps_par%RHS(nrhs*nrank))
        mat%mumps_par%RHS = RESHAPE(rhs, SHAPE(mat%mumps_par%RHS))
     END IF
 !
     mat%mumps_par%JOB = 3
     CALL dmumps(mat%mumps_par)
 !
 !    The solution will be broadcasted to everyone
 !
     IF(PRESENT(sol)) THEN
        IF(mat%mumps_par%MYID .EQ. 0) sol=RESHAPE(mat%mumps_par%RHS, SHAPE(sol))
        CALL mpi_bcast(sol, PRODUCT(SHAPE(rhs)), MPI_DOUBLE_PRECISION, &
             &         0, mat%mumps_par%COMM, ierr)
     ELSE
        IF(mat%mumps_par%MYID .EQ. 0) rhs=RESHAPE(mat%mumps_par%RHS, SHAPE(rhs))
        CALL mpi_bcast(rhs, PRODUCT(SHAPE(rhs)), MPI_DOUBLE_PRECISION, &
             &         0, mat%mumps_par%COMM, ierr)
     END IF
 !
     IF(mat%mumps_par%MYID .EQ. 0) THEN
        DEALLOCATE(mat%mumps_par%RHS)
        IF(mat%mumps_par%INFOG(1).NE.0) THEN
           WRITE(*,'(a,2i12)') 'BSOLVE: Reordering failed with error', &
             &             mat%mumps_par%INFOG(1:2)
           STOP
        END IF
     END IF
   END SUBROUTINE bsolve_mumps_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE bsolve_zmumps_matn(mat, rhs, sol, nref, debug)
 !
 !   Backsolve, using Mumps
 !
     INCLUDE 'mpif.h'
     TYPE(zmumps_mat)                        :: mat
     DOUBLE COMPLEX, INTENT(inout)           :: rhs(:,:)
     DOUBLE COMPLEX, OPTIONAL, INTENT(inout) :: sol(:,:)
     INTEGER, OPTIONAL, INTENT(in)           :: nref
     LOGICAL, OPTIONAL, INTENT(in)           :: debug
 !
     INTEGER :: nrank, nrhs, ierr
 !
     nrank = SIZE(rhs,1)
     nrhs = SIZE(rhs,2)
 !
 !    Verbose messages
 !
     mat%mumps_par%ICNTL(3) = 0
     IF(PRESENT(debug)) THEN
        IF(debug) mat%mumps_par%ICNTL(3) = 6
     END IF
 !
     IF(mat%mumps_par%MYID .EQ. 0) THEN
        mat%mumps_par%NRHS = nrhs
        mat%mumps_par%LRHS = nrank
        mat%mumps_par%ICNTL(10) = 0 ! Max numbers of iterative refinement steps
        IF(PRESENT(nref)) mat%mumps_par%ICNTL(10) = nref
 !
        ALLOCATE(mat%mumps_par%RHS(nrhs*nrank))
        mat%mumps_par%RHS = RESHAPE(rhs, SHAPE(mat%mumps_par%RHS))
     END IF
 !
     mat%mumps_par%JOB = 3
     CALL zmumps(mat%mumps_par)
 !
 !    The solution will be broadcasted to everyone
 !
     IF(PRESENT(sol)) THEN
        IF(mat%mumps_par%MYID .EQ. 0) sol=RESHAPE(mat%mumps_par%RHS, SHAPE(sol))
        CALL mpi_bcast(sol, PRODUCT(SHAPE(rhs)), MPI_DOUBLE_COMPLEX, &
             &         0, mat%mumps_par%COMM, ierr)
     ELSE
        IF(mat%mumps_par%MYID .EQ. 0) rhs=RESHAPE(mat%mumps_par%RHS, SHAPE(rhs))
        CALL mpi_bcast(rhs, PRODUCT(SHAPE(rhs)), MPI_DOUBLE_COMPLEX, &
             &         0, mat%mumps_par%COMM, ierr)
     END IF
 !
     IF(mat%mumps_par%MYID .EQ. 0) THEN
        DEALLOCATE(mat%mumps_par%RHS)
        IF(mat%mumps_par%INFOG(1).NE.0) THEN
           WRITE(*,'(a,2i12)') 'BSOLVE: Reordering failed with error', &
             &             mat%mumps_par%INFOG(1:2)
           STOP
        END IF
     END IF
   END SUBROUTINE bsolve_zmumps_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_mumps_mat(mat, xarr) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(mumps_mat)               :: mat
     DOUBLE PRECISION, INTENT(in)  :: xarr(:)
     DOUBLE PRECISION              :: yarr(SIZE(xarr))
     DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
     CHARACTER(len=6) :: matdescra
     INTEGER :: n, i, j
 !
     n = mat%rank
 !
 #ifdef MKL
     IF(mat%nlsym) THEN
        matdescra = 'sun'
     ELSE
        matdescra = 'g'
     END IF
 !
     CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, &
          &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
          &          beta, yarr)
 #else
     yarr = 0.0d0
     DO i=1,n
        DO j=mat%irow(i), mat%irow(i+1)-1
           yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
        END DO
        IF(mat%nlsym) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j)) = yarr(mat%cols(j)) &
                   &              + mat%val(j)*xarr(i)
           END DO
        END IF
     END DO
 #endif
 !
   END FUNCTION vmx_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_zmumps_mat(mat, xarr) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(zmumps_mat)            :: mat
     DOUBLE COMPLEX, INTENT(in)  :: xarr(:)
     DOUBLE COMPLEX              :: yarr(SIZE(xarr))
     DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
     INTEGER :: n, i, j
     CHARACTER(len=6) :: matdescra
 !
     n = mat%rank
 !
 #ifdef MKL
     IF(mat%nlsym) THEN
        matdescra = 'sun'
     ELSE IF(mat%nlherm) THEN
        matdescra = 'hun'
     ELSE
        matdescra = 'g'
     END IF
     CALL mkl_zcsrmv('N', n, n, alpha, matdescra, mat%val, &
          &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
          &          beta, yarr)
 #else
     yarr = (0.0d0,0.0d0)
     DO i=1,n
        DO j=mat%irow(i), mat%irow(i+1)-1
           yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
        END DO
        IF(mat%nlsym) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j)) = yarr(mat%cols(j)) &
                   &              + mat%val(j)*xarr(i)
           END DO
        ELSE IF(mat%nlherm) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j)) = yarr(mat%cols(j)) &
                   &              + CONJG(mat%val(j))*xarr(i)
           END DO
        END IF
     END DO
 #endif
 !
   END FUNCTION vmx_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_mumps_matn(mat, xarr) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(mumps_mat)               :: mat
     DOUBLE PRECISION, INTENT(in)  :: xarr(:,:)
     DOUBLE PRECISION              :: yarr(SIZE(xarr,1),SIZE(xarr,2))
 !
     DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
     INTEGER :: n, nrhs, i, j
     CHARACTER(len=6) :: matdescra
 !
     n = mat%rank
     nrhs = SIZE(xarr,2)
 !
 #ifdef MKL
     IF(mat%nlsym) THEN
        matdescra = 'sun'
     ELSE
        matdescra = 'g'
     END IF
 !
     CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,&
          &           mat%cols, mat%irow(1), mat%irow(2), xarr, &
          &           n, beta, yarr, n)
 #else
     yarr = 0.0d0
     DO i=1,n
        DO j=mat%irow(i), mat%irow(i+1)-1
           yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
        END DO
        IF(mat%nlsym) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
                   &                + mat%val(j)*xarr(i,:)
           END DO
        END IF
     END DO
 #endif
 !
   END FUNCTION vmx_mumps_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_zmumps_matn(mat, xarr) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(zmumps_mat)            :: mat
     DOUBLE COMPLEX, INTENT(in)  :: xarr(:,:)
     DOUBLE COMPLEX              :: yarr(SIZE(xarr,1),SIZE(xarr,2))
 !
     DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
     INTEGER :: n, nrhs, i, j
     CHARACTER(len=6) :: matdescra
 !
     n = mat%rank
     nrhs = SIZE(xarr,2)
 !
 #ifdef MKL
     IF(mat%nlsym) THEN
        matdescra = 'sun'
     ELSE IF(mat%nlherm) THEN
        matdescra = 'hun'
     ELSE
        matdescra = 'g'
     END IF
 !
     CALL mkl_zcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val, &
          &          mat%cols, mat%irow(1), mat%irow(2), xarr, n, &
          &          beta, yarr, n)
 #else
     yarr = (0.0d0,0.0d0)
     DO i=1,n
        DO j=mat%irow(i), mat%irow(i+1)-1
           yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
        END DO
        IF(mat%nlsym) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
                   &                + mat%val(j)*xarr(i,:)
           END DO
        ELSE IF(mat%nlherm) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
                   &               + CONJG(mat%val(j))*xarr(i,:)
           END DO
        END IF
     END DO
 #endif
 !
   END FUNCTION vmx_zmumps_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE destroy_mumps_mat(mat)
 !
 !  Deallocate the sparse matrix mat
 !
     TYPE(mumps_mat)  :: mat
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL destroy(mat%mat)
        DEALLOCATE(mat%mat)
     END IF
 !
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
 !
     IF(ASSOCIATED(mat%mumps_par%IRN_loc)) DEALLOCATE(mat%mumps_par%IRN_loc)
     mat%mumps_par%JOB = -2
     CALL dmumps(mat%mumps_par)
     IF(mat%mumps_par%INFOG(1).NE.0) THEN
        WRITE(*,'(a,2i12)') 'DESTROY: Reordering failed with error', &
             &             mat%mumps_par%INFOG(1:2)
        STOP
     END IF
   END SUBROUTINE destroy_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE destroy_zmumps_mat(mat)
 !
 !  Deallocate the sparse matrix mat
 !
     TYPE(zmumps_mat) :: mat
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL destroy(mat%mat)
        DEALLOCATE(mat%mat)
     END IF
 !
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
 !
     IF(ASSOCIATED(mat%mumps_par%IRN_loc)) DEALLOCATE(mat%mumps_par%IRN_loc)
     mat%mumps_par%JOB = -2
     CALL zmumps(mat%mumps_par)
     IF(mat%mumps_par%INFOG(1).NE.0) THEN
        WRITE(*,'(a,2i12)') 'DESTROY: Reordering failed with error', &
             &             mat%mumps_par%INFOG(1:2)
        STOP
     END IF
   END SUBROUTINE destroy_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE put_mumps_mat(fid, label, mat, str)
 !
 !   Write matrix to hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)                    :: fid
     CHARACTER(len=*), INTENT(in)           :: label
     TYPE(mumps_mat)                        :: mat
     CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
     CHARACTER(len=128) :: mumps_grp
 !
     IF(PRESENT(str)) THEN
        CALL creatg(fid, label, str)
     ELSE
        CALL creatg(fid, label)
     END IF
     IF(ASSOCIATED(mat%mat)) THEN
        CALL to_mat(mat)
     END IF
     CALL attach(fid, label, 'RANK', mat%rank)
     CALL attach(fid, label, 'NNZ',  mat%nnz)
     CALL attach(fid, label, 'NLSYM', mat%nlsym)
     CALL attach(fid, label, 'NLPOS', mat%nlpos)
     CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
     CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
     CALL putarr(fid, TRIM(label)//'/perm', mat%perm)
     CALL putarr(fid, TRIM(label)//'/val', mat%val)
 !
     mumps_grp = TRIM(label)//'/mumps_par'
     CALL creatg(fid, mumps_grp)
     CALL attach(fid, mumps_grp, 'PAR', mat%mumps_par%PAR)
     CALL attach(fid, mumps_grp, 'SYM', mat%mumps_par%SYM)
     CALL putarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN)
 !
   END SUBROUTINE put_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE put_zmumps_mat(fid, label, mat, str)
 !
 !   Write matrix to hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)                    :: fid
     CHARACTER(len=*), INTENT(in)           :: label
     TYPE(zmumps_mat)                       :: mat
     CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
     CHARACTER(len=128) :: mumps_grp
 !
     IF(PRESENT(str)) THEN
        CALL creatg(fid, label, str)
     ELSE
        CALL creatg(fid, label)
     END IF
     IF(ASSOCIATED(mat%mat)) THEN
        CALL to_mat(mat)
     END IF
     CALL attach(fid, label, 'RANK', mat%rank)
     CALL attach(fid, label, 'NNZ',  mat%nnz)
     CALL attach(fid, label, 'NLSYM', mat%nlsym)
     CALL attach(fid, label, 'NLPOS', mat%nlpos)
     CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
     CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
     CALL putarr(fid, TRIM(label)//'/val', mat%val)
 !
     mumps_grp = TRIM(label)//'/mumps_par'
     CALL creatg(fid, mumps_grp)
     CALL attach(fid, mumps_grp, 'PAR', mat%mumps_par%PAR)
     CALL attach(fid, mumps_grp, 'SYM', mat%mumps_par%SYM)
     CALL putarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN)
 !
   END SUBROUTINE put_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE get_mumps_mat(fid, label, mat)
 !
 !   Read matrix from hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)                    :: fid
     CHARACTER(len=*), INTENT(in)           :: label
     TYPE(mumps_mat)                        :: mat
     CHARACTER(len=128) :: mumps_grp
 !
     CALL getatt(fid, label, 'RANK', mat%rank)
     CALL getatt(fid, label, 'NNZ',  mat%nnz)
     CALL getatt(fid, label, 'NLSYM', mat%nlsym)
     CALL getarr(fid, TRIM(label)//'/irow', mat%irow)
     CALL getarr(fid, TRIM(label)//'/cols', mat%cols)
     CALL getarr(fid, TRIM(label)//'/perm', mat%perm)
     CALL getarr(fid, TRIM(label)//'/val', mat%val)
 !
     mumps_grp = TRIM(label)//'/mumps_par'
     CALL getatt(fid, mumps_grp, 'PAR', mat%mumps_par%PAR)
     CALL getatt(fid, mumps_grp, 'SYM', mat%mumps_par%SYM)
     CALL getarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN)
 !
   END SUBROUTINE get_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE get_zmumps_mat(fid, label, mat)
 !
 !   Read matrix from hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)                    :: fid
     CHARACTER(len=*), INTENT(in)           :: label
     TYPE(zmumps_mat)                       :: mat
     CHARACTER(len=128) :: mumps_grp
 !
     CALL getatt(fid, label, 'RANK', mat%rank)
     CALL getatt(fid, label, 'NNZ',  mat%nnz)
     CALL getatt(fid, label, 'NLSYM', mat%nlsym)
     CALL getarr(fid, TRIM(label)//'/irow', mat%irow)
     CALL getarr(fid, TRIM(label)//'/cols', mat%cols)
     CALL getarr(fid, TRIM(label)//'/perm', mat%perm)
     CALL getarr(fid, TRIM(label)//'/val', mat%val)
 !
     mumps_grp = TRIM(label)//'/mumps_par'
     CALL getatt(fid, mumps_grp, 'PAR', mat%mumps_par%PAR)
     CALL getatt(fid, mumps_grp, 'SYM', mat%mumps_par%SYM)
     CALL getarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN)
 !
   END SUBROUTINE get_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE mcopy_mumps_mat(mata, matb)
 !
 !   Matrix copy: B = A (assume that B is already initialize)
 !
     TYPE(mumps_mat) :: mata, matb
     INTEGER :: n, nnz, nnz_loc
 !
     IF(ASSOCIATED(matb%mat)) THEN  ! Sparse linled list not needed
        CALL destroy(matb%mat)
        DEALLOCATE(matb%mat)
     END IF
 !
     n = mata%rank
     nnz = mata%nnz
     nnz_loc = mata%nnz_loc
     matb%nnz = nnz
     matb%nnz_loc = nnz_loc
     matb%nnz_start = mata%nnz_start
     matb%nnz_end = mata%nnz_end
     matb%istart = mata%istart
     matb%iend = mata%iend
 !
     matb%mumps_par%NZ_loc = mata%mumps_par%NZ_loc
 !
     IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
     IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols)
     IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow)
     ALLOCATE(matb%val(nnz_loc)); matb%val = mata%val
     ALLOCATE(matb%cols(nnz_loc)); matb%cols = mata%cols
     ALLOCATE(matb%irow(matb%istart:matb%iend+1)); matb%irow = mata%irow
 !
     ALLOCATE(matb%mumps_par%IRN_loc(nnz_loc))
     matb%mumps_par%IRN_loc = mata%mumps_par%IRN_loc
     matb%mumps_par%A_loc => matb%val
     matb%mumps_par%JCN_loc => matb%cols
   END SUBROUTINE mcopy_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE mcopy_zmumps_mat(mata, matb)
 !
 !   Matrix copy: B = A (assume that B is already initialize)
 !
     TYPE(zmumps_mat) :: mata, matb
     INTEGER :: n, nnz, nnz_loc
 !
     IF(ASSOCIATED(matb%mat)) THEN  ! Sparse linled list not needed
        CALL destroy(matb%mat)
        DEALLOCATE(matb%mat)
     END IF
 !
     n = mata%rank
     nnz = mata%nnz
     nnz_loc = mata%nnz_loc
     matb%nnz = nnz
     matb%nnz_loc = nnz_loc
     matb%nnz_start = mata%nnz_start
     matb%nnz_end = mata%nnz_end
     matb%istart = mata%istart
     matb%iend = mata%iend
 !
     matb%mumps_par%NZ_loc = mata%mumps_par%NZ_loc
 !
     IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
     IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols)
     IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow)
     ALLOCATE(matb%val(nnz_loc)); matb%val = mata%val
     ALLOCATE(matb%cols(nnz_loc)); matb%cols = mata%cols
     ALLOCATE(matb%irow(matb%istart:matb%iend+1)); matb%irow = mata%irow
 !
     ALLOCATE(matb%mumps_par%IRN_loc(nnz_loc))
     matb%mumps_par%IRN_loc = mata%mumps_par%IRN_loc
     matb%mumps_par%A_loc => matb%val
     matb%mumps_par%JCN_loc => matb%cols
   END SUBROUTINE mcopy_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE maddto_mumps_mat(mata, alpha, matb)
 !
 !   A <- A + alpha*B
 !
     TYPE(mumps_mat) :: mata, matb
     DOUBLE PRECISION  :: alpha
 !
     mata%val = mata%val + alpha*matb%val
   END SUBROUTINE maddto_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE maddto_zmumps_mat(mata, alpha, matb)
 !
 !   A <- A + alpha*B
 !
     TYPE(zmumps_mat) :: mata, matb
     DOUBLE COMPLEX     :: alpha
 !
     mata%val = mata%val + alpha*matb%val
   END SUBROUTINE maddto_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE psum_mumps_mat(mat, comm)
 !
 !   Parallel sum of sparse matrices
 !
     INCLUDE "mpif.h"
 !
     TYPE(mumps_mat)   :: mat
     INCLUDE 'psum_mat.tpl'
   END SUBROUTINE psum_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE psum_zmumps_mat(mat, comm)
 !
 !   Parallel sum of sparse matrices
 !
     INCLUDE "mpif.h"
 !
     TYPE(zmumps_mat)   :: mat
     INCLUDE 'psum_mat.tpl'
   END SUBROUTINE psum_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE p2p_mumps_mat(mat, dest, extyp, op, comm)
 !
 !   Point-to-point combine sparse matrix between 2 processes
 !
     INCLUDE "mpif.h"
 !
     TYPE(mumps_mat)            :: mat
     DOUBLE PRECISION, ALLOCATABLE :: val(:)
     INTEGER :: mpi_type=MPI_DOUBLE_PRECISION
 !
     INCLUDE "p2p_mat.tpl"
   END SUBROUTINE p2p_mumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE p2p_zmumps_mat(mat, dest, extyp, op, comm)
 !
 !   Point-to-point combine sparse matrix between 2 processes
 !
     INCLUDE "mpif.h"
 !
     TYPE(zmumps_mat)           :: mat
     DOUBLE COMPLEX, ALLOCATABLE :: val(:)
     INTEGER :: mpi_type=MPI_DOUBLE_COMPLEX
 !
     INCLUDE "p2p_mat.tpl"
   END SUBROUTINE p2p_zmumps_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE mumps_bsplines
diff --git a/src/p2p_mat.tpl b/src/p2p_mat.tpl
index c39e729..7d9e10e 100644
--- a/src/p2p_mat.tpl
+++ b/src/p2p_mat.tpl
@@ -1,119 +1,119 @@
 !>
 !> @file p2p_mat.tpl
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
     INTEGER, INTENT(in)          :: dest
     CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv')
     CHARACTER(len=*), INTENT(in) :: op    ! ('put', 'updt')
     INTEGER, INTENT(in)          :: comm
 !
     INTEGER :: ierr
     INTEGER :: nrank, nnz, nnz_rem
     INTEGER :: i, s, idx, bufsize, position
     CHARACTER(len=1), ALLOCATABLE :: sbuf(:), rbuf(:)
     INTEGER, ALLOCATABLE :: irow(:), cols(:)
 !--------------------------------------------------------------------------
 !                    1.0  Prologue
 !
     nrank = mat%rank
     nnz = get_count(mat)
     CALL mpi_sendrecv(nnz, 1, MPI_INTEGER, dest, 0, &
          &            nnz_rem, 1, MPI_INTEGER, dest, 0, &
          &            comm, MPI_STATUS_IGNORE, ierr)
 !--------------------------------------------------------------------------
 !                    2.0  Send or sendrecv
 !
     IF(extyp.EQ.'send' .OR. extyp.EQ.'sendrecv') THEN
 !
 !    Allocate packed send buffer
        bufsize = 0
        CALL mpi_pack_size(nrank+1, MPI_INTEGER, comm, s, ierr); bufsize=bufsize+s
        CALL mpi_pack_size(nnz, MPI_INTEGER, comm, s, ierr); bufsize=bufsize+s
        CALL mpi_pack_size(nnz, mpi_type, comm, s, ierr); bufsize=bufsize+s
        ALLOCATE(sbuf(bufsize))
 !
 !    Obtain matrix CSR arrays and pack
        CALL to_mat(mat, nlkeep=.TRUE.)
        position = 0
        CALL mpi_pack(mat%irow, nrank+1, MPI_INTEGER, sbuf, bufsize, position, comm, ierr)
        CALL mpi_pack(mat%cols, nnz, MPI_INTEGER, sbuf, bufsize, position, comm, ierr)
        CALL mpi_pack(mat%val, nnz, mpi_type, sbuf, bufsize, position, comm, ierr)
        DEALLOCATE(mat%irow)
        DEALLOCATE(mat%cols)
        DEALLOCATE(mat%val)
 !
 !   Communicate packed buffer
        IF(extyp.EQ.'send') THEN
           CALL mpi_send(sbuf, position, MPI_PACKED, dest, 0, comm, ierr)
           DEALLOCATE(sbuf)
        END IF
     END IF
 !--------------------------------------------------------------------------
 !                    3.0  Sendrecv or recv
 !
     IF(extyp.EQ.'recv' .OR. extyp.EQ.'sendrecv') THEN
 !
 !    Allocate unpacked received buffer
        bufsize = 0
        CALL mpi_pack_size(nrank+1, MPI_INTEGER, comm, s, ierr); bufsize=bufsize+s
        CALL mpi_pack_size(nnz_rem, MPI_INTEGER, comm, s, ierr); bufsize=bufsize+s
        CALL mpi_pack_size(nnz_rem, mpi_type, comm, s, ierr); bufsize=bufsize+s
        ALLOCATE(rbuf(bufsize))
 !
 !    Communicate packed buffer
        IF(extyp.EQ.'recv') THEN
           CALL mpi_recv(rbuf, bufsize, MPI_PACKED, dest, 0, comm, MPI_STATUS_IGNORE, ierr)
        ELSE IF(extyp.EQ.'sendrecv') THEN
           CALL mpi_sendrecv(sbuf, position, MPI_PACKED, dest, 0, &
                &            rbuf, bufsize, MPI_PACKED, dest, 0, &
                &            comm, MPI_STATUS_IGNORE, ierr)
           DEALLOCATE(sbuf)
        END IF
 !
 !    Unpacked rbuf
        ALLOCATE(irow(nrank+1))
        ALLOCATE(cols(nnz_rem))
        ALLOCATE(val(nnz_rem))
        position = 0
        CALL mpi_unpack(rbuf, bufsize, position, irow, nrank+1, MPI_INTEGER, comm, ierr)
        CALL mpi_unpack(rbuf, bufsize, position, cols, nnz_rem, MPI_INTEGER, comm, ierr)
        CALL mpi_unpack(rbuf, bufsize, position, val, nnz_rem, mpi_type, comm, ierr)
        DEALLOCATE(rbuf)
 !
 !    Update/replace sparse matrix
        DO i=1,nrank
           DO idx=irow(i),irow(i+1)-1
              IF(op.EQ.'updt') THEN
                 CALL updtmat(mat, i, cols(idx), val(idx))
              ELSE IF(op.EQ.'put') THEN
                 CALL putele(mat, i, cols(idx), val(idx))
              END IF
           END DO
        END DO
        DEALLOCATE(irow)
        DEALLOCATE(cols)
        DEALLOCATE(val)
 !      
     END IF
 !--------------------------------------------------------------------------
diff --git a/src/pardiso_mod.f90 b/src/pardiso_mod.f90
index 4491b9c..b7fe14b 100644
--- a/src/pardiso_mod.f90
+++ b/src/pardiso_mod.f90
@@ -1,1605 +1,1605 @@
 !>
 !> @file pardiso_mod.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE pardiso_bsplines
 !
 !    PARDISO_BSPLINES: Simple interface to the sparse direct solver PARDISO
 !                      (MKL version).
 !
 !    T.M. Tran, CRPP-EPFL
 !    November 2010
 !
   USE sparse
   IMPLICIT NONE
 !
   TYPE pardiso_param
      INTEGER   :: error, mtype, msglvl, phase, maxfct, mnum, nrhs
      INTEGER   :: iparm(64)
      INTEGER*8 :: pt(64)
   END TYPE pardiso_param
 !
   TYPE pardiso_mat
      INTEGER :: rank, nnz
      INTEGER :: nterms, kmat
      LOGICAL :: nlsym
      LOGICAL :: nlpos
      LOGICAL :: nlforce_zero
      TYPE(spmat), POINTER      :: mat => NULL()
      INTEGER, POINTER          :: cols(:) => NULL()
      INTEGER, POINTER          :: irow(:) => NULL()
      INTEGER, POINTER          :: perm(:) => NULL()
      DOUBLE PRECISION, POINTER :: val(:) => NULL()
      TYPE(pardiso_param)       :: p
   END TYPE pardiso_mat
 !
   TYPE zpardiso_mat
      INTEGER :: rank, nnz
      INTEGER :: nterms, kmat
      LOGICAL :: nlsym
      LOGICAL :: nlherm
      LOGICAL :: nlpos
      LOGICAL :: nlforce_zero
      TYPE(zspmat), POINTER   :: mat => NULL()
      INTEGER, POINTER        :: cols(:) => NULL()
      INTEGER, POINTER        :: irow(:) => NULL()
      INTEGER, POINTER        :: perm(:) => NULL()
      DOUBLE COMPLEX, POINTER :: val(:) => NULL()
      TYPE(pardiso_param)     :: p
   END TYPE zpardiso_mat
 !
   INTERFACE init
      MODULE PROCEDURE init_pardiso_mat, init_zpardiso_mat
   END INTERFACE init
 !
   INTERFACE clear_mat
      MODULE PROCEDURE clear_pardiso_mat, clear_zpardiso_mat
   END INTERFACE clear_mat
 !
   INTERFACE updtmat
      MODULE PROCEDURE updt_pardiso_mat, updt_zpardiso_mat
   END INTERFACE updtmat
 !
   INTERFACE putele
      MODULE PROCEDURE putele_pardiso_mat, putele_zpardiso_mat
   END INTERFACE putele
 !
   INTERFACE getele
      MODULE PROCEDURE getele_pardiso_mat, getele_zpardiso_mat
   END INTERFACE getele
 !
   INTERFACE putrow
      MODULE PROCEDURE putrow_pardiso_mat, putrow_zpardiso_mat
   END INTERFACE putrow
 !
   INTERFACE getrow
      MODULE PROCEDURE getrow_pardiso_mat, getrow_zpardiso_mat
   END INTERFACE getrow
 !
   INTERFACE putcol
      MODULE PROCEDURE putcol_pardiso_mat, putcol_zpardiso_mat
   END INTERFACE putcol
 !
   INTERFACE getcol
      MODULE PROCEDURE getcol_pardiso_mat, getcol_zpardiso_mat
   END INTERFACE getcol
 !
   INTERFACE get_count
      MODULE PROCEDURE get_count_pardiso_mat, get_count_zpardiso_mat
   END INTERFACE get_count
 !
   INTERFACE to_mat
      MODULE PROCEDURE to_pardiso_mat, to_zpardiso_mat
   END INTERFACE to_mat
 !
   INTERFACE reord_mat
      MODULE PROCEDURE reord_pardiso_mat, reord_zpardiso_mat
   END INTERFACE reord_mat
 !
   INTERFACE numfact
      MODULE PROCEDURE numfact_pardiso_mat, numfact_zpardiso_mat
   END INTERFACE numfact
 !
   INTERFACE factor
      MODULE PROCEDURE factor_pardiso_mat, factor_zpardiso_mat
   END INTERFACE factor
 !
   INTERFACE bsolve
      MODULE PROCEDURE bsolve_pardiso_mat1,  bsolve_pardiso_matn, &
           &           bsolve_zpardiso_mat1, bsolve_zpardiso_matn
   END INTERFACE bsolve
 !
   INTERFACE vmx
      MODULE PROCEDURE vmx_pardiso_mat,  vmx_pardiso_matn, &
           &           vmx_zpardiso_mat, vmx_zpardiso_matn
   END INTERFACE vmx
 !
   INTERFACE destroy
      MODULE PROCEDURE destroy_pardiso_mat, destroy_zpardiso_mat
  END INTERFACE destroy
 !
  INTERFACE putmat
     MODULE PROCEDURE put_pardiso_mat, put_zpardiso_mat
  END INTERFACE putmat
 !
  INTERFACE getmat
     MODULE PROCEDURE get_pardiso_mat, get_zpardiso_mat
  END INTERFACE getmat
 !
  INTERFACE mcopy
     MODULE PROCEDURE mcopy_pardiso_mat, mcopy_zpardiso_mat
  END INTERFACE mcopy
 !
  INTERFACE maddto
     MODULE PROCEDURE maddto_pardiso_mat, maddto_zpardiso_mat
  END INTERFACE maddto
 !
  INTERFACE psum_mat
     MODULE PROCEDURE psum_pardiso_mat, psum_zpardiso_mat
  END INTERFACE psum_mat
 !
  INTERFACE p2p_mat
     MODULE PROCEDURE p2p_pardiso_mat, p2p_zpardiso_mat
  END INTERFACE p2p_mat
 !
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE init_pardiso_mat(n, nterms, mat, kmat, nlsym, nlpos, &
        &                      nlforce_zero)
 !
 !   Initialize an empty sparse pardiso matrix
 !
     INTEGER, INTENT(in)           :: n, nterms
     TYPE(pardiso_mat)             :: mat
     INTEGER, OPTIONAL, INTENT(in) :: kmat
     LOGICAL, OPTIONAL, INTENT(in) :: nlsym
     LOGICAL, OPTIONAL, INTENT(in) :: nlpos
     LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
 !
     mat%rank = n
     mat%nterms = nterms
     mat%nnz = 0
     mat%nlsym = .FALSE.
     mat%nlpos = .TRUE.
     mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
     IF(PRESENT(kmat)) mat%kmat = kmat
     IF(PRESENT(nlsym)) mat%nlsym = nlsym
     IF(PRESENT(nlpos)) mat%nlpos = nlpos
     IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
 !
     IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat)
     ALLOCATE(mat%mat)
     CALL init(n, mat%mat)
 !
     mat%p%iparm = 0
     CALL setup_pardiso(mat%p%iparm)
     mat%p%maxfct = 1  ! Max number of factorizations
     mat%p%mnum   = 1  ! Actual matrix, shoild be 1<= num <= maxfct
     mat%p%error  = 0  ! initialize error flag
     mat%p%msglvl = 1  ! print statistical information (0: no stat)
     IF(mat%nlsym) THEN
        IF(mat%nlpos) THEN
           mat%p%mtype = 2   ! symmetric, positive definite
        ELSE
           mat%p%mtype = -2  ! symmetric, indefinite
        END IF
     ELSE
        mat%p%mtype = 11  ! unsymmetric
     END IF
     mat%p%nrhs    = 1  ! number of RHSs
     mat%p%pt(1:64) = 0 ! Initialize Pardiso address pointer (handle)
 !
   CONTAINS
     SUBROUTINE setup_pardiso(iparm)
       INTEGER :: iparm(:)
       iparm(1) = 1 ! no solver default
 !!$      iparm(2) = 2 ! fill-in reordering from METIS
       iparm(2) = 0 ! Minimum degree fill-in reordering
       iparm(3) = 1 ! numbers of processors
       iparm(4) = 0 ! no iterative-direct algorithm
       iparm(5) = 0 ! no user fill-in reducing permutation
       iparm(6) = 0 ! =0 solution on the first n compoments of x
       iparm(7) = 0 ! not in use
       iparm(8) = 9 ! numbers of iterative refinement steps
       iparm(9) = 0 ! not in use
       iparm(10) = 13 ! perturbe the pivot elements with 1E-13
       iparm(11) = 1 ! use nonsymmetric permutation and scaling MPS
       iparm(12) = 0 ! not in use
       iparm(13) = 0 ! not in use
       iparm(14) = 0 ! Output: number of perturbed pivots
       iparm(15) = 0 ! not in use
       iparm(16) = 0 ! not in use
       iparm(17) = 0 ! not in use
       iparm(18) = -1 ! Output: number of nonzeros in the factor LU
       iparm(19) = -1 ! Output: Mflops for LU factorization
       iparm(20) = 0 ! Output: Numbers of CG Iterations
     END SUBROUTINE setup_pardiso
   END SUBROUTINE init_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE init_zpardiso_mat(n, nterms, mat, kmat, nlsym, nlherm, &
        &                       nlpos, nlforce_zero)
 !
 !   Initialize an empty sparse pardiso matrix
 !
     INTEGER, INTENT(in)           :: n, nterms
     TYPE(zpardiso_mat)            :: mat
     INTEGER, OPTIONAL, INTENT(in) :: kmat
     LOGICAL, OPTIONAL, INTENT(in) :: nlsym
     LOGICAL, OPTIONAL, INTENT(in) :: nlherm
     LOGICAL, OPTIONAL, INTENT(in) :: nlpos
     LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
 !
     mat%rank = n
     mat%nterms = nterms
     mat%nnz = 0
     mat%nlsym = .FALSE.
     mat%nlherm = .FALSE.
     mat%nlpos = .TRUE.
     mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
     IF(PRESENT(kmat)) mat%kmat = kmat
     IF(PRESENT(nlsym)) mat%nlsym = nlsym
     IF(PRESENT(nlherm)) mat%nlherm = nlherm
     IF(PRESENT(nlpos)) mat%nlpos = nlpos
     IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
 !
     IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat)
     ALLOCATE(mat%mat)
     CALL init(n, mat%mat)
 !
     mat%p%iparm = 0
     CALL setup_pardiso(mat%p%iparm)
     mat%p%maxfct = 1  ! Max number of factorizations
     mat%p%mnum   = 1  ! Actual matrix, shoild be 1<= num <= maxfct
     mat%p%error  = 0  ! initialize error flag
     mat%p%msglvl = 1  ! print statistical information (0: no stat)
     IF(mat%nlherm) THEN
        IF(mat%nlpos) THEN
           mat%p%mtype = 4   ! hermitian, positive definite
        ELSE
           mat%p%mtype = -4  ! hermitian, indefinite
        END IF
     ELSE IF(mat%nlsym) THEN
        mat%p%mtype = 6   ! symmetric
     ELSE
        mat%p%mtype = 13  ! unsymmetric
     END IF
     mat%p%nrhs    = 1  ! number of RHSs
     mat%p%pt(1:64) = 0 ! Initialize Pardiso address pointer (handle)
 !
   CONTAINS
     SUBROUTINE setup_pardiso(iparm)
       INTEGER :: iparm(:)
       iparm(1) = 1 ! no solver default
 !!$      iparm(2) = 2 ! fill-in reordering from METIS
       iparm(2) = 0 ! Minimum degree fill-in reordering
       iparm(3) = 1 ! numbers of processors
       iparm(4) = 0 ! no iterative-direct algorithm
       iparm(5) = 0 ! no user fill-in reducing permutation
       iparm(6) = 0 ! =0 solution on the first n compoments of x
       iparm(7) = 0 ! not in use
       iparm(8) = 9 ! numbers of iterative refinement steps
       iparm(9) = 0 ! not in use
       iparm(10) = 13 ! perturbe the pivot elements with 1E-13
       iparm(11) = 1 ! use nonsymmetric permutation and scaling MPS
       iparm(12) = 0 ! not in use
       iparm(13) = 0 ! not in use
       iparm(14) = 0 ! Output: number of perturbed pivots
       iparm(15) = 0 ! not in use
       iparm(16) = 0 ! not in use
       iparm(17) = 0 ! not in use
       iparm(18) = -1 ! Output: number of nonzeros in the factor LU
       iparm(19) = -1 ! Output: Mflops for LU factorization
       iparm(20) = 0 ! Output: Numbers of CG Iterations
     END SUBROUTINE setup_pardiso
   END SUBROUTINE init_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE clear_pardiso_mat(mat)
 !
 !   Clear matrix, keeping its sparse structure unchanged
 !
     TYPE(pardiso_mat) :: mat
 !
     mat%val = 0.0d0
     mat%perm = 0
   END SUBROUTINE clear_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE clear_zpardiso_mat(mat)
 !
 !   Clear matrix, keeping its sparse structure unchanged
 !
     TYPE(zpardiso_mat) :: mat
 !
     mat%val = (0.0d0, 0.0d0)
     mat%perm = 0
   END SUBROUTINE clear_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE updt_pardiso_mat(mat, i, j, val)
 !
 !   Update element Aij of pardiso  matrix
 !
     TYPE(pardiso_mat)            :: mat
     INTEGER, INTENT(in)          :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
     INTEGER :: k, s, e
 !
     IF(mat%nlsym) THEN   ! Store only upper part for symmetric matrices
        IF(i.GT.j) RETURN
     END IF
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL updtmat(mat%mat, i, j, val)
     ELSE
        s = mat%irow(i)
        e = mat%irow(i+1)-1
        k = isearch(mat%cols(s:e), j)
        IF( k.GE.0 ) THEN
           mat%val(s+k) =  mat%val(s+k)+val
        ELSE
           WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
           STOP '*** Abnormal EXIT in MODULE pardiso_mod ***'
        END IF
     END IF
   END SUBROUTINE updt_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE updt_zpardiso_mat(mat, i, j, val)
 !
 !   Update element Aij of pardiso  matrix
 !
     TYPE(zpardiso_mat)         :: mat
     INTEGER, INTENT(in)        :: i, j
     DOUBLE COMPLEX, INTENT(in) :: val
     INTEGER :: k, s, e
 !
     IF(mat%nlherm .OR. mat%nlsym) THEN   ! Store only upper part
        IF(i.GT.j) RETURN
     END IF
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL updtmat(mat%mat, i, j, val)
     ELSE
        s = mat%irow(i)
        e = mat%irow(i+1)-1
        k = isearch(mat%cols(s:e), j)
        IF( k.GE.0 ) THEN
           mat%val(s+k) =  mat%val(s+k)+val
        ELSE
           WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
           STOP '*** Abnormal EXIT in MODULE pardiso_mod ***'
        END IF
     END IF
   END SUBROUTINE updt_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putele_pardiso_mat(mat, i, j, val)
 !
 !  Put element (i,j) of matrix
 !
     TYPE(pardiso_mat)            :: mat
     INTEGER, INTENT(in)          :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
     INTEGER :: iput, jput
     INTEGER :: k, s, e
 !
     iput = i
     jput = j
     IF(mat%nlsym) THEN
        IF( i.GT.j ) THEN    ! Lower triangular part
           iput = j
           jput = i
        END IF
     END IF
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL putele(mat%mat, iput, jput, val, &
             &      nlforce_zero=mat%nlforce_zero)
     ELSE
        s = mat%irow(iput)
        e = mat%irow(iput+1)-1
        k = isearch(mat%cols(s:e), jput)
        IF( k.GE.0 ) THEN
           mat%val(s+k) =  val
        ELSE
           IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
              WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
              STOP '*** Abnormal EXIT in MODULE pardiso_mod ***'
           END IF
        END IF
    END IF
   END SUBROUTINE putele_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putele_zpardiso_mat(mat, i, j, val)
 !
 !  Put element (i,j) of matrix
 !
     TYPE(zpardiso_mat)         :: mat
     INTEGER, INTENT(in)        :: i, j
     DOUBLE COMPLEX, INTENT(in) :: val
     DOUBLE COMPLEX :: valput
     INTEGER :: iput, jput
     INTEGER :: k, s, e
 !
     iput = i
     jput = j
     valput = val
     IF(mat%nlherm .OR. mat%nlsym) THEN
        IF( i.GT.j ) THEN    ! Lower triangular part
           iput = j
           jput = i
           IF(mat%nlherm) THEN
              valput = CONJG(val)
           ELSE
              valput = val
           END IF
        END IF
     END IF
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL putele(mat%mat, iput, jput, valput, &
             &      nlforce_zero=mat%nlforce_zero)
     ELSE
        s = mat%irow(iput)
        e = mat%irow(iput+1)-1
        k = isearch(mat%cols(s:e), jput)
        IF( k.GE.0 ) THEN
           mat%val(s+k) =  valput
        ELSE
           IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
              WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
              STOP '*** Abnormal EXIT in MODULE pardiso_mod ***'
           END IF
        END IF
    END IF
  END SUBROUTINE putele_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getele_pardiso_mat(mat, i, j, val)
 !
 !   Get element (i,j) of sparse matrix
 !
     TYPE(pardiso_mat)             :: mat
     INTEGER, INTENT(in)           :: i, j
     DOUBLE PRECISION, INTENT(out) :: val
     INTEGER :: iget, jget
     INTEGER :: k, s, e
 !
     iget = i
     jget = j
     IF(mat%nlsym) THEN
        IF( i.GT.j ) THEN    ! Lower triangular part
           iget = j
           jget = i
        END IF
     END IF
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL getele(mat%mat, iget, jget, val)
     ELSE
        s = mat%irow(iget)
        e = mat%irow(iget+1)-1
        k = isearch(mat%cols(s:e), jget)
        IF( k.GE.0 ) THEN
           val =mat%val(s+k) 
        ELSE
           val = 0.0d0   ! Assume zero val if not found
        END IF
     END IF
   END SUBROUTINE getele_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getele_zpardiso_mat(mat, i, j, val)
 !
 !   Get element (i,j) of sparse matrix
 !
     TYPE(zpardiso_mat)          :: mat
     INTEGER, INTENT(in)         :: i, j
     DOUBLE COMPLEX, INTENT(out) :: val
     DOUBLE COMPLEX :: valget
     INTEGER :: iget, jget
     INTEGER :: k, s, e
 !
     iget = i
     jget = j
     IF(mat%nlherm .OR. mat%nlsym) THEN
        IF( i.GT.j ) THEN    ! Lower triangular part
           iget = j
           jget = i
        END IF
     END IF
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL getele(mat%mat, iget, jget, valget)
     ELSE
        s = mat%irow(iget)
        e = mat%irow(iget+1)-1
        k = isearch(mat%cols(s:e), jget)
        IF( k.GE.0 ) THEN
           valget =mat%val(s+k) 
        ELSE
           valget = (0.0d0,0.0d0)   ! Assume zero val if not found
        END IF
     END IF
     val = valget
     IF( i.GT.j ) THEN
        IF(mat%nlherm) THEN 
           val = CONJG(valget)
        END IF
     END IF
   END SUBROUTINE getele_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putrow_pardiso_mat(amat, i, arr)
 !
 ! Put a row into sparse matrix
 !
     TYPE(pardiso_mat), INTENT(inout) :: amat
     INTEGER, INTENT(in)              :: i
     DOUBLE PRECISION, INTENT(in)     :: arr(:)
     INTEGER :: j
 !
     DO j=1,amat%rank
        CALL putele(amat, i, j, arr(j))
     END DO
   END SUBROUTINE putrow_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putrow_zpardiso_mat(amat, i, arr)
 !
 ! Put a row into sparse matrix
 !
     TYPE(zpardiso_mat), INTENT(inout) :: amat
     INTEGER, INTENT(in)               :: i
     DOUBLE COMPLEX, INTENT(in)        :: arr(:)
     INTEGER :: j
 !
     DO j=1,amat%rank
        CALL putele(amat, i, j, arr(j))
     END DO
   END SUBROUTINE putrow_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getrow_pardiso_mat(amat, i, arr)
 !
 !   Get a row from sparse matrix
 !
     TYPE(pardiso_mat), INTENT(in)  :: amat
     INTEGER, INTENT(in)            :: i
     DOUBLE PRECISION, INTENT(out)  :: arr(:)
     INTEGER :: j
 !
     DO j=1,amat%rank
        CALL getele(amat, i, j, arr(j))
     END DO
   END SUBROUTINE getrow_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getrow_zpardiso_mat(amat, i, arr)
 !
 !   Get a row from sparse matrix
 !
     TYPE(zpardiso_mat), INTENT(in)  :: amat
     INTEGER, INTENT(in)             :: i
     DOUBLE COMPLEX, INTENT(out)     :: arr(:)
     INTEGER :: j
 !
     DO j=1,amat%rank
        CALL getele(amat, i, j, arr(j))
     END DO
   END SUBROUTINE getrow_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putcol_pardiso_mat(amat, j, arr)
 !
 ! Put a column into sparse matrix
 !
     TYPE(pardiso_mat), INTENT(inout) :: amat
     INTEGER, INTENT(in)              :: j
     DOUBLE PRECISION, INTENT(in)     :: arr(:)
     INTEGER :: i
 !
     DO i=1,amat%rank
        CALL putele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE putcol_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putcol_zpardiso_mat(amat, j, arr)
 !
 ! Put a column into sparse matrix
 !
     TYPE(zpardiso_mat), INTENT(inout) :: amat
     INTEGER, INTENT(in)               :: j
     DOUBLE COMPLEX, INTENT(in)        :: arr(:)
     INTEGER :: i
 !
     DO i=1,amat%rank
        CALL putele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE putcol_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getcol_pardiso_mat(amat, j, arr)
 !
 !   Get a column from sparse matrix
 !
     TYPE(pardiso_mat), INTENT(in)  :: amat
     INTEGER, INTENT(in)            :: j
     DOUBLE PRECISION, INTENT(out)  :: arr(:)
     INTEGER :: i
 !
     DO i=1,amat%rank
        CALL getele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE getcol_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getcol_zpardiso_mat(amat, j, arr)
 !
 !   Get a column from sparse matrix
 !
     TYPE(zpardiso_mat), INTENT(in)  :: amat
     INTEGER, INTENT(in)             :: j
     DOUBLE COMPLEX, INTENT(out)     :: arr(:)
     INTEGER :: i
 !
     DO i=1,amat%rank
        CALL getele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE getcol_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   INTEGER FUNCTION get_count_pardiso_mat(mat, nnz)
 !
 !   Number of non-zeros in sparse matrix
 !
     TYPE(pardiso_mat)              :: mat
     INTEGER, INTENT(out), OPTIONAL :: nnz(:)
     INTEGER :: i
 !
     IF(ASSOCIATED(mat%mat)) THEN
        get_count_pardiso_mat = get_count(mat%mat, nnz)
     ELSE
        get_count_pardiso_mat = mat%nnz
        IF(PRESENT(nnz)) THEN
           DO i=1,mat%rank
              nnz(i) = mat%irow(i+1)-mat%irow(i)
           END DO
        END IF
     END IF
   END FUNCTION get_count_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   INTEGER FUNCTION get_count_zpardiso_mat(mat, nnz)
 !
 !   Number of non-zeros in sparse matrix
 !
     TYPE(zpardiso_mat)             :: mat
     INTEGER, INTENT(out), OPTIONAL :: nnz(:)
     INTEGER :: i
 !
     IF(ASSOCIATED(mat%mat)) THEN
        get_count_zpardiso_mat = get_count(mat%mat, nnz)
     ELSE
        get_count_zpardiso_mat = mat%nnz
        IF(PRESENT(nnz)) THEN
           DO i=1,mat%rank
              nnz(i) = mat%irow(i+1)-mat%irow(i)
           END DO
        END IF
     END IF
   END FUNCTION get_count_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE to_pardiso_mat(mat, nlkeep)
 !
 !   Convert linked list spmat to pardiso matrice structure
 !
     TYPE(pardiso_mat)             :: mat
     LOGICAL, INTENT(in), OPTIONAL :: nlkeep
     INTEGER :: i, nnz, rank, s, e
     LOGICAL :: nlclean
 !
     nlclean = .TRUE.
     IF(PRESENT(nlkeep)) THEN
        nlclean = .NOT. nlkeep
     END IF
 !
 !    Allocate the Pardiso matrix structure
 !
     nnz = get_count(mat)
     rank = mat%rank
     mat%nnz = nnz
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     ALLOCATE(mat%val(nnz))
     ALLOCATE(mat%perm(rank))
     ALLOCATE(mat%irow(rank+1))
     ALLOCATE(mat%cols(nnz))
 !
 !    Fill Pardiso structure and optionnaly deallocate the sparse rows
 !
     mat%irow = 1
     DO i=1,rank
        mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz
        s = mat%irow(i)
        e = mat%irow(i+1)-1
        CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e))
        IF(nlclean) CALL destroy(mat%mat%row(i))
     END DO
     IF(nlclean) DEALLOCATE(mat%mat)
   END SUBROUTINE to_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE to_zpardiso_mat(mat, nlkeep)
 !
 !   Convert linked list spmat to pardiso matrice structure
 !
     TYPE(zpardiso_mat) :: mat
     LOGICAL, INTENT(in), OPTIONAL :: nlkeep
     INTEGER :: i, nnz, rank, s, e
     LOGICAL :: nlclean
 !
     nlclean = .TRUE.
     IF(PRESENT(nlkeep)) THEN
        nlclean = .NOT. nlkeep
     END IF
 !
 !    Allocate the Pardiso matrix structure
 !
     nnz = get_count(mat)
     rank = mat%rank
     mat%nnz = nnz
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     ALLOCATE(mat%val(nnz))
     ALLOCATE(mat%perm(rank))
     ALLOCATE(mat%irow(rank+1))
     ALLOCATE(mat%cols(nnz))
 !
 !    Fill Pardiso structure and deallocate the sparse rows
 !
     mat%irow = 1
     DO i=1,rank
        mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz
        s = mat%irow(i)
        e = mat%irow(i+1)-1
        CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e))
        IF(nlclean) CALL destroy(mat%mat%row(i))
     END DO
     IF(nlclean) DEALLOCATE(mat%mat)
   END SUBROUTINE to_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE reord_pardiso_mat(mat, nlmetis, debug)
 !
 !   Reordering and symbolic factorization
 !
     TYPE(pardiso_mat)             :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
     LOGICAL, OPTIONAL, INTENT(in) :: debug
     DOUBLE PRECISION :: dummy
 !
     mat%p%iparm(2) = 0                ! use minimum degree algorithm
     IF(PRESENT(nlmetis)) THEN
        IF(nlmetis) mat%p%iparm(2) = 2 ! use METIS nested dissection
     END IF
     mat%p%iparm(5)= 2   ! return the permutation vector in mat%perm
     mat%p%phase   = 11  ! Reordering and symbolic factorization
     mat%p%msglvl  = 0
     IF(PRESENT(debug)) THEN
        IF(debug) mat%p%msglvl = 1
     END IF
     CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
          &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
          &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
          &       dummy, dummy, mat%p%error)
     IF(mat%p%error.NE.0) THEN
        WRITE(*,'(a,i4)') 'REORD: Reordering failed with error', mat%p%error
     END IF
   END SUBROUTINE reord_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE reord_zpardiso_mat(mat, nlmetis, debug)
 !
 !   Reordering and symbolic factorization
 !
     TYPE(zpardiso_mat)            :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
     LOGICAL, OPTIONAL, INTENT(in) :: debug
     DOUBLE COMPLEX :: dummy
 !
     mat%p%iparm(2) = 0                ! use minimum degree algorithm
     IF(PRESENT(nlmetis)) THEN
        IF(nlmetis) mat%p%iparm(2) = 2 ! use METIS nested dissection
     END IF
     mat%p%iparm(5)= 2   ! return the permutation vector in mat%perm
     mat%p%phase   = 11  ! Reordering and symbolic factorization
     mat%p%msglvl  = 0
     IF(PRESENT(debug)) THEN
        IF(debug) mat%p%msglvl = 1
     END IF
     CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
          &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
          &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
          &       dummy, dummy, mat%p%error)
     IF(mat%p%error.NE.0) THEN
        WRITE(*,'(a,i4)') 'REORD: Reordering failed with error', mat%p%error
     END IF
   END SUBROUTINE reord_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE numfact_pardiso_mat(mat, debug)
 !
 !   Numerical factorization
 !
     TYPE(pardiso_mat)             :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: debug
     DOUBLE PRECISION :: dummy
 !
     mat%p%phase  = 22 ! Numerical factorization
     mat%p%msglvl = 0
     IF(PRESENT(debug)) THEN
        IF(debug) mat%p%msglvl = 1
     END IF
     CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
          &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
          &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
          &       dummy, dummy, mat%p%error)
     IF(mat%p%error.NE.0) THEN
        WRITE(*,'(a,i4)') 'FACTOR: Factorization failed with error', mat%p%error
     END IF
   END SUBROUTINE numfact_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE numfact_zpardiso_mat(mat, debug)
 !
 !   Numerical factorization
 !
     TYPE(zpardiso_mat)            :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: debug
     DOUBLE COMPLEX :: dummy
 !
     mat%p%phase  = 22 ! Numerical factorization
     mat%p%msglvl = 0
     IF(PRESENT(debug)) THEN
        IF(debug) mat%p%msglvl = 1
     END IF
     CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
          &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
          &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
          &       dummy, dummy, mat%p%error)
     IF(mat%p%error.NE.0) THEN
        WRITE(*,'(a,i4)') 'FACTOR: Factorization failed with error', mat%p%error
     END IF
   END SUBROUTINE numfact_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE factor_pardiso_mat(mat, nlreord, nlmetis, debug)
 !
 !  Factor (create  +reorder + factor) a pardiso_mat matrix
 !
     TYPE(pardiso_mat)             :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: nlreord
     LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
     LOGICAL, OPTIONAL, INTENT(in) :: debug
     LOGICAL :: mlreord
 !----------------------------------------------------------------------
 !               1.0  Creation from the sparse rows
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL to_mat(mat)
     END IF
 !----------------------------------------------------------------------
 !               2.0  Reordering and symbolic factorization phase
 !
     mlreord = .TRUE.
     IF(PRESENT(nlreord)) mlreord = nlreord
     IF(mlreord) THEN
        CALL reord_mat(mat, nlmetis, debug)
     END IF
 !----------------------------------------------------------------------
 !               3.0  Numerical factorization
 !
     CALL numfact(mat, debug)
   END SUBROUTINE factor_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE factor_zpardiso_mat(mat, nlreord, nlmetis, debug)
 !
 !  Factor (create  +reorder + factor) a pardiso_mat matrix
 !
     TYPE(zpardiso_mat)            :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: nlreord
     LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
     LOGICAL, OPTIONAL, INTENT(in) :: debug
     LOGICAL :: mlreord
 !----------------------------------------------------------------------
 !               1.0  Creation from the sparse rows
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL to_mat(mat)
     END IF
 !----------------------------------------------------------------------
 !               2.0  Reordering and symbolic factorization phase
 !
     mlreord = .TRUE.
     IF(PRESENT(nlreord)) mlreord = nlreord
     IF(mlreord) THEN
        CALL reord_mat(mat, nlmetis, debug)
     END IF
 !----------------------------------------------------------------------
 !               3.0  Numerical factorization
 !
     CALL numfact(mat, debug)
   END SUBROUTINE factor_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE bsolve_pardiso_mat1(mat, rhs, sol, nref, debug)
 !
 !   Backsolve, using Pardiso
 !
     TYPE(pardiso_mat)             :: mat
     DOUBLE PRECISION              :: rhs(:)
     DOUBLE PRECISION, OPTIONAL    :: sol(:)
     INTEGER, OPTIONAL             :: nref
     LOGICAL, OPTIONAL, INTENT(in) :: debug
     DOUBLE PRECISION :: dummy(SIZE(rhs))
 !
     mat%p%iparm(8) = 0 ! Max numbers of iterative refinement steps
     IF(PRESENT(nref)) mat%p%iparm(8) = nref
     mat%p%phase = 33   ! Backsolve
     mat%p%nrhs = 1
     mat%p%msglvl = 0
     IF(PRESENT(debug)) THEN
        IF(debug) THEN 
           mat%p%msglvl = 1
        END IF
     END IF
     IF(PRESENT(sol)) THEN
        mat%p%iparm(6) = 0
        CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
             &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
             &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
             &       rhs, sol, mat%p%error)
     ELSE
        mat%p%iparm(6) = 1
        CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
             &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
             &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
             &       rhs, dummy, mat%p%error)
     END IF
     IF(mat%p%error.NE.0) THEN
        WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%error
     END IF
    END SUBROUTINE bsolve_pardiso_mat1
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE bsolve_zpardiso_mat1(mat, rhs, sol, nref, debug)
 !
 !   Backsolve, using Pardiso
 !
     TYPE(zpardiso_mat)            :: mat
     DOUBLE COMPLEX                :: rhs(:)
     DOUBLE COMPLEX, OPTIONAL      :: sol(:)
     INTEGER, OPTIONAL             :: nref
     LOGICAL, OPTIONAL, INTENT(in) :: debug
     DOUBLE COMPLEX :: dummy(SIZE(rhs))
 !
     mat%p%iparm(8) = 0 ! Max numbers of iterative refinement steps
     IF(PRESENT(nref)) mat%p%iparm(8) = nref
     mat%p%phase = 33   ! Backsolve
     mat%p%nrhs = 1
     mat%p%msglvl = 0
     IF(PRESENT(debug)) THEN
        IF(debug) THEN 
           mat%p%msglvl = 1
        END IF
     END IF
     IF(PRESENT(sol)) THEN
        mat%p%iparm(6) = 0
        CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
             &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
             &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
             &       rhs, sol, mat%p%error)
     ELSE
        mat%p%iparm(6) = 1
        CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
             &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
             &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
             &       rhs, dummy, mat%p%error)
     END IF
     IF(mat%p%error.NE.0) THEN
        WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%error
     END IF
   END SUBROUTINE bsolve_zpardiso_mat1
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE bsolve_pardiso_matn(mat, rhs, sol, nref, debug)
 !
 !   Backsolve, using Pardiso, multiple RHS
 !
     TYPE(pardiso_mat)             :: mat
     DOUBLE PRECISION              :: rhs(:,:)
     DOUBLE PRECISION, OPTIONAL    :: sol(:,:)
     INTEGER, OPTIONAL             :: nref
     LOGICAL, OPTIONAL, INTENT(in) :: debug
     DOUBLE PRECISION :: dummy(SIZE(rhs,1),SIZE(rhs,2))
 !
     mat%p%iparm(8) = 0 ! Max numbers of iterative refinement steps
     IF(PRESENT(nref)) mat%p%iparm(8) = nref
     mat%p%phase = 33   ! Backsolve
     mat%p%nrhs = SIZE(rhs,2)
     mat%p%msglvl = 0
     IF(PRESENT(debug)) THEN
        IF(debug) THEN 
           mat%p%msglvl = 1
        END IF
     END IF
     IF(PRESENT(sol)) THEN
        mat%p%iparm(6) = 0
        CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
             &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
             &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
             &       rhs, sol, mat%p%error)
     ELSE
        mat%p%iparm(6) = 1
        CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
             &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
             &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
             &       rhs, dummy, mat%p%error)
     END IF
     IF(mat%p%error.NE.0) THEN
        WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%error
     END IF
   END SUBROUTINE bsolve_pardiso_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE bsolve_zpardiso_matn(mat, rhs, sol, nref, debug)
 !
 !   Backsolve, using Pardiso, multiple RHS
 !
     TYPE(zpardiso_mat)            :: mat
     DOUBLE COMPLEX                :: rhs(:,:)
     DOUBLE COMPLEX, OPTIONAL      :: sol(:,:)
     INTEGER, OPTIONAL             :: nref
     LOGICAL, OPTIONAL, INTENT(in) :: debug
     DOUBLE COMPLEX :: dummy(SIZE(rhs,1),SIZE(rhs,2))
 !
     mat%p%iparm(8) = 0 ! Max numbers of iterative refinement steps
     IF(PRESENT(nref)) mat%p%iparm(8) = nref
     mat%p%phase = 33   ! Backsolve
     mat%p%nrhs = SIZE(rhs,2)
     mat%p%msglvl = 0
     IF(PRESENT(debug)) THEN
        IF(debug) THEN 
           mat%p%msglvl = 1
        END IF
     END IF
     IF(PRESENT(sol)) THEN
        mat%p%iparm(6) = 0
        CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
             &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
             &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
             &       rhs, sol, mat%p%error)
     ELSE
        mat%p%iparm(6) = 1
        CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
             &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
             &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
             &       rhs, dummy, mat%p%error)
     END IF
     IF(mat%p%error.NE.0) THEN
        WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%error
     END IF
   END SUBROUTINE bsolve_zpardiso_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_pardiso_mat(mat, xarr) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(pardiso_mat)             :: mat
     DOUBLE PRECISION, INTENT(in)  :: xarr(:)
     DOUBLE PRECISION              :: yarr(SIZE(xarr))
     DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
     CHARACTER(len=6) :: matdescra
     INTEGER :: n, i, j
 !
     n = mat%rank
 !
 #ifdef MKL
     IF(mat%nlsym) THEN
        matdescra = 'sun'
     ELSE
        matdescra = 'g'
     END IF
 !
     CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, &
          &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
          &          beta, yarr)
 #else
     yarr = 0.0d0
     DO i=1,n
        DO j=mat%irow(i), mat%irow(i+1)-1
           yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
        END DO
        IF(mat%nlsym) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j)) = yarr(mat%cols(j)) &
                   &              + mat%val(j)*xarr(i)
           END DO
        END IF
     END DO
 #endif
 !
   END FUNCTION vmx_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_zpardiso_mat(mat, xarr) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(zpardiso_mat)          :: mat
     DOUBLE COMPLEX, INTENT(in)  :: xarr(:)
     DOUBLE COMPLEX              :: yarr(SIZE(xarr))
     DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
     INTEGER :: n, i, j
     CHARACTER(len=6) :: matdescra
 !
     n = mat%rank
 !
 #ifdef MKL
     IF(mat%nlsym) THEN
        matdescra = 'sun'
     ELSE IF(mat%nlherm) THEN
        matdescra = 'hun'
     ELSE
        matdescra = 'g'
     END IF
     CALL mkl_zcsrmv('N', n, n, alpha, matdescra, mat%val, &
          &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
          &          beta, yarr)
 #else
     yarr = (0.0d0,0.0d0)
     DO i=1,n
        DO j=mat%irow(i), mat%irow(i+1)-1
           yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
        END DO
        IF(mat%nlsym) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j)) = yarr(mat%cols(j)) &
                   &              + mat%val(j)*xarr(i)
           END DO
        ELSE IF(mat%nlherm) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j)) = yarr(mat%cols(j)) &
                   &              + CONJG(mat%val(j))*xarr(i)
           END DO
        END IF
     END DO
 #endif
 !
   END FUNCTION vmx_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_pardiso_matn(mat, xarr) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(pardiso_mat)             :: mat
     DOUBLE PRECISION, INTENT(in)  :: xarr(:,:)
     DOUBLE PRECISION              :: yarr(SIZE(xarr,1),SIZE(xarr,2))
 !
     DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
     INTEGER :: n, nrhs, i, j
     CHARACTER(len=6) :: matdescra
 !
     n = mat%rank
     nrhs = SIZE(xarr,2)
 !
 #ifdef MKL
     IF(mat%nlsym) THEN
        matdescra = 'sun'
     ELSE
        matdescra = 'g'
     END IF
 !
     CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,&
          &           mat%cols, mat%irow(1), mat%irow(2), xarr, &
          &           n, beta, yarr, n)
 #else
     yarr = 0.0d0
     DO i=1,n
        DO j=mat%irow(i), mat%irow(i+1)-1
           yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
        END DO
        IF(mat%nlsym) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
                   &                + mat%val(j)*xarr(i,:)
           END DO
        END IF
     END DO
 #endif
 !
   END FUNCTION vmx_pardiso_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_zpardiso_matn(mat, xarr) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(zpardiso_mat)          :: mat
     DOUBLE COMPLEX, INTENT(in)  :: xarr(:,:)
     DOUBLE COMPLEX              :: yarr(SIZE(xarr,1),SIZE(xarr,2))
 !
     DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
     INTEGER :: n, nrhs, i, j
     CHARACTER(len=6) :: matdescra
 !
     n = mat%rank
     nrhs = SIZE(xarr,2)
 !
 #ifdef MKL
     IF(mat%nlsym) THEN
        matdescra = 'sun'
     ELSE IF(mat%nlherm) THEN
        matdescra = 'hun'
     ELSE
        matdescra = 'g'
     END IF
 !
     CALL mkl_zcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val, &
          &          mat%cols, mat%irow(1), mat%irow(2), xarr, n, &
          &          beta, yarr, n)
 #else
     yarr = (0.0d0,0.0d0)
     DO i=1,n
        DO j=mat%irow(i), mat%irow(i+1)-1
           yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
        END DO
        IF(mat%nlsym) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
                   &                + mat%val(j)*xarr(i,:)
           END DO
        ELSE IF(mat%nlherm) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
                   &               + CONJG(mat%val(j))*xarr(i,:)
           END DO
        END IF
     END DO
 #endif
 !
   END FUNCTION vmx_zpardiso_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE destroy_pardiso_mat(mat)
 !
 !  Deallocate the sparse matrix mat
 !
     TYPE(pardiso_mat) :: mat
     DOUBLE PRECISION :: dummy
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL destroy(mat%mat)
        DEALLOCATE(mat%mat)
     END IF
 !
     IF(mat%p%phase .GT. 0) THEN
        mat%p%phase   = 0  ! Release memory for factors
        CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
             &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
             &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
             &       dummy, dummy, mat%p%error)
        IF(mat%p%error.NE.0) THEN
           WRITE(*,'(a,i4)') 'DESTROY: Mem release failed with error', mat%p%error
        END IF
     END IF
 !
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
   END SUBROUTINE destroy_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE destroy_zpardiso_mat(mat)
 !
 !  Deallocate the sparse matrix mat
 !
     TYPE(zpardiso_mat) :: mat
     DOUBLE COMPLEX :: dummy
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL destroy(mat%mat)
        DEALLOCATE(mat%mat)
     END IF
 !
     IF(mat%p%phase .GT. 0) THEN
        mat%p%phase   = 0  ! Release memory for factors
        CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
             &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
             &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
             &       dummy, dummy, mat%p%error)
        IF(mat%p%error.NE.0) THEN
           WRITE(*,'(a,i4)') 'DESTROY: Mem release failed with error', mat%p%error
        END IF
     END IF
 !
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
   END SUBROUTINE destroy_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE put_pardiso_mat(fid, label, mat, str)
 !
 !   Write matrix to hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)                    :: fid
     CHARACTER(len=*), INTENT(in)           :: label
     TYPE(pardiso_mat)                      :: mat
     CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
 !
     IF(PRESENT(str)) THEN
        CALL creatg(fid, label, str)
     ELSE
        CALL creatg(fid, label)
     END IF
     IF(ASSOCIATED(mat%mat)) THEN
        CALL to_mat(mat)
     END IF
     CALL attach(fid, label, 'RANK', mat%rank)
     CALL attach(fid, label, 'NNZ',  mat%nnz)
     CALL attach(fid, label, 'NLSYM', mat%nlsym)
     CALL attach(fid, label, 'NLPOS', mat%nlpos)
     CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
     CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
     CALL putarr(fid, TRIM(label)//'/perm', mat%perm)
     CALL putarr(fid, TRIM(label)//'/val', mat%val)
 !
     CALL creatg(fid, TRIM(label)//'/p')
     CALL attach(fid, TRIM(label)//'/p', 'error', mat%p%error)
     CALL attach(fid, TRIM(label)//'/p', 'mtype', mat%p%mtype)
     CALL attach(fid, TRIM(label)//'/p', 'phase', mat%p%phase)
     CALL attach(fid, TRIM(label)//'/p', 'msglv', mat%p%msglvl)
     CALL attach(fid, TRIM(label)//'/p', 'maxfct', mat%p%maxfct)
     CALL attach(fid, TRIM(label)//'/p', 'mnum', mat%p%mnum)
     CALL attach(fid, TRIM(label)//'/p', 'nrhs', mat%p%nrhs)
     CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
   END SUBROUTINE put_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE put_zpardiso_mat(fid, label, mat, str)
 !
 !   Write matrix to hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)                    :: fid
     CHARACTER(len=*), INTENT(in)           :: label
     TYPE(zpardiso_mat)                     :: mat
     CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
 !
     IF(PRESENT(str)) THEN
        CALL creatg(fid, label, str)
     ELSE
        CALL creatg(fid, label)
     END IF
     IF(ASSOCIATED(mat%mat)) THEN
        CALL to_mat(mat)
     END IF
     CALL attach(fid, label, 'RANK', mat%rank)
     CALL attach(fid, label, 'NNZ',  mat%nnz)
     CALL attach(fid, label, 'NLSYM', mat%nlsym)
     CALL attach(fid, label, 'NLPOS', mat%nlpos)
     CALL attach(fid, label, 'NLHERM', mat%nlherm)
     CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
     CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
     CALL putarr(fid, TRIM(label)//'/perm', mat%perm)
     CALL putarr(fid, TRIM(label)//'/val', mat%val)
 !
     CALL creatg(fid, TRIM(label)//'/p')
     CALL attach(fid, TRIM(label)//'/p', 'error', mat%p%error)
     CALL attach(fid, TRIM(label)//'/p', 'mtype', mat%p%mtype)
     CALL attach(fid, TRIM(label)//'/p', 'phase', mat%p%phase)
     CALL attach(fid, TRIM(label)//'/p', 'msglv', mat%p%msglvl)
     CALL attach(fid, TRIM(label)//'/p', 'maxfct', mat%p%maxfct)
     CALL attach(fid, TRIM(label)//'/p', 'mnum', mat%p%mnum)
     CALL attach(fid, TRIM(label)//'/p', 'nrhs', mat%p%nrhs)
     CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
   END SUBROUTINE put_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE get_pardiso_mat(fid, label, mat)
 !
 !   Read matrix from hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)                    :: fid
     CHARACTER(len=*), INTENT(in)           :: label
     TYPE(pardiso_mat)                      :: mat
 !
     CALL getatt(fid, label, 'RANK', mat%rank)
     CALL getatt(fid, label, 'NNZ',  mat%nnz)
     CALL getatt(fid, label, 'NLSYM', mat%nlsym)
     CALL getarr(fid, TRIM(label)//'/irow', mat%irow)
     CALL getarr(fid, TRIM(label)//'/cols', mat%cols)
     CALL getarr(fid, TRIM(label)//'/perm', mat%perm)
     CALL getarr(fid, TRIM(label)//'/val', mat%val)
 !
     CALL getatt(fid, TRIM(label)//'/p', 'error', mat%p%error)
     CALL getatt(fid, TRIM(label)//'/p', 'mtype', mat%p%mtype)
     CALL getatt(fid, TRIM(label)//'/p', 'phase', mat%p%phase)
     CALL getatt(fid, TRIM(label)//'/p', 'msglv', mat%p%msglvl)
     CALL getatt(fid, TRIM(label)//'/p', 'maxfct', mat%p%maxfct)
     CALL getatt(fid, TRIM(label)//'/p', 'mnum', mat%p%mnum)
     CALL getatt(fid, TRIM(label)//'/p', 'nrhs', mat%p%nrhs)
     CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
   END SUBROUTINE get_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE get_zpardiso_mat(fid, label, mat)
 !
 !   Read matrix from hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)                    :: fid
     CHARACTER(len=*), INTENT(in)           :: label
     TYPE(zpardiso_mat)                      :: mat
 !
     CALL getatt(fid, label, 'RANK', mat%rank)
     CALL getatt(fid, label, 'NNZ',  mat%nnz)
     CALL getatt(fid, label, 'SYM', mat%nlsym)
     CALL getatt(fid, label, 'HERM', mat%nlherm)
     CALL getarr(fid, TRIM(label)//'/irow', mat%irow)
     CALL getarr(fid, TRIM(label)//'/cols', mat%cols)
     CALL getarr(fid, TRIM(label)//'/perm', mat%perm)
     CALL getarr(fid, TRIM(label)//'/val', mat%val)
 !
     CALL getatt(fid, TRIM(label)//'/p', 'error', mat%p%error)
     CALL getatt(fid, TRIM(label)//'/p', 'mtype', mat%p%mtype)
     CALL getatt(fid, TRIM(label)//'/p', 'phase', mat%p%phase)
     CALL getatt(fid, TRIM(label)//'/p', 'msglv', mat%p%msglvl)
     CALL getatt(fid, TRIM(label)//'/p', 'maxfct', mat%p%maxfct)
     CALL getatt(fid, TRIM(label)//'/p', 'mnum', mat%p%mnum)
     CALL getatt(fid, TRIM(label)//'/p', 'nrhs', mat%p%nrhs)
     CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
   END SUBROUTINE get_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE mcopy_pardiso_mat(mata, matb)
 !
 !   Matrix copy: B = A
 !
     TYPE(pardiso_mat) :: mata, matb
     INTEGER :: n, nnz
 !
 ! Assume that matb was already initialized by init_wsmp_mat.
     IF(matb%rank.LE.0) THEN
        WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT'
        STOP '*** Abnormal EXIT in MODULE pardiso_mod ***'
     END IF
 !
     IF(ASSOCIATED(matb%mat)) THEN 
        CALL destroy(matb%mat)
        DEALLOCATE(matb%mat)
     END IF
 !
     n = mata%rank
     nnz = mata%nnz
     matb%rank = n
     matb%nnz = nnz
     matb%nlsym = mata%nlsym
     matb%nlpos = mata%nlpos
     matb%nlforce_zero = mata%nlforce_zero
 !
     IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
     IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols)
     IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow)
     IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm)
     ALLOCATE(matb%val(nnz)); matb%val = mata%val
     ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols
     ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow
     ALLOCATE(matb%perm(n))
   END SUBROUTINE mcopy_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE mcopy_zpardiso_mat(mata, matb)
 !
 !   Matrix copy: B = A
 !
     TYPE(zpardiso_mat) :: mata, matb
     INTEGER :: n, nnz
 !
 ! Assume that matb was already initialized by init_wsmp_mat.
     IF(matb%rank.LE.0) THEN
        WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT'
        STOP '*** Abnormal EXIT in MODULE pardiso_mod ***'
     END IF
 !
     IF(ASSOCIATED(matb%mat)) THEN 
        CALL destroy(matb%mat)
        DEALLOCATE(matb%mat)
     END IF
 !
     n = mata%rank
     nnz = mata%nnz
     matb%rank = n
     matb%nnz = nnz
     matb%nlsym = mata%nlsym
     matb%nlherm = mata%nlherm
     matb%nlpos = mata%nlpos
     matb%nlforce_zero = mata%nlforce_zero
 !
     IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
     IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols)
     IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow)
     IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm)
     ALLOCATE(matb%val(nnz)); matb%val = mata%val
     ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols
     ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow
     ALLOCATE(matb%perm(n))
   END SUBROUTINE mcopy_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE maddto_pardiso_mat(mata, alpha, matb)
 !
 !   A <- A + alpha*B
 !
     TYPE(pardiso_mat) :: mata, matb
     DOUBLE PRECISION  :: alpha
 !
     mata%val = mata%val + alpha*matb%val
   END SUBROUTINE maddto_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE maddto_zpardiso_mat(mata, alpha, matb)
 !
 !   A <- A + alpha*B
 !
     TYPE(zpardiso_mat) :: mata, matb
     DOUBLE COMPLEX     :: alpha
 !
     mata%val = mata%val + alpha*matb%val
   END SUBROUTINE maddto_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE psum_pardiso_mat(mat, comm)
 !
 !   Parallel sum of sparse matrices
 !
     INCLUDE "mpif.h"
 !
     TYPE(pardiso_mat)   :: mat
     INCLUDE 'psum_mat.tpl'
   END SUBROUTINE psum_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE psum_zpardiso_mat(mat, comm)
 !
 !   Parallel sum of sparse matrices
 !
     INCLUDE "mpif.h"
 !
     TYPE(zpardiso_mat)   :: mat
     INCLUDE 'psum_mat.tpl'
   END SUBROUTINE psum_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE p2p_pardiso_mat(mat, dest, extyp, op, comm)
 !
 !   Point-to-point combine sparse matrix between 2 processes
 !
     INCLUDE "mpif.h"
 !
     TYPE(pardiso_mat)            :: mat
     DOUBLE PRECISION, ALLOCATABLE :: val(:)
     INTEGER :: mpi_type=MPI_DOUBLE_PRECISION
 !
     INCLUDE "p2p_mat.tpl"
   END SUBROUTINE p2p_pardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE p2p_zpardiso_mat(mat, dest, extyp, op, comm)
 !
 !   Point-to-point combine sparse matrix between 2 processes
 !
     INCLUDE "mpif.h"
 !
     TYPE(zpardiso_mat)           :: mat
     DOUBLE COMPLEX, ALLOCATABLE :: val(:)
     INTEGER :: mpi_type=MPI_DOUBLE_COMPLEX
 !
     INCLUDE "p2p_mat.tpl"
   END SUBROUTINE p2p_zpardiso_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE pardiso_bsplines
diff --git a/src/petsc_mod.F90 b/src/petsc_mod.F90
index 9909f17..acd8d8f 100644
--- a/src/petsc_mod.F90
+++ b/src/petsc_mod.F90
@@ -1,873 +1,873 @@
 !>
 !> @file petsc_mod.F90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE petsc_bsplines
 !
 !    PETSC_BSPLINES: Simple interface to the parallel iterative
 !                    solver PETSC
 !
 !    T.M. Tran, CRPP-EPFL
 !    June 2011
 !
   USE sparse
   IMPLICIT NONE
 
 #include "finclude/petsc.h90"
 
 !
   TYPE petsc_mat
      INTEGER :: rank
      INTEGER(8) :: nnz, nnz_loc
      INTEGER :: nterms, kmat
      INTEGER :: istart, iend
      INTEGER, POINTER :: rcounts(:) => NULL()
      INTEGER, POINTER :: rdispls(:) => NULL()
      INTEGER :: comm
      LOGICAL :: nlsym
      LOGICAL :: nlforce_zero
      TYPE(spmat), POINTER      :: mat => NULL()
      INTEGER, POINTER          :: cols(:) => NULL()
      INTEGER, POINTER          :: irow(:) => NULL()
      DOUBLE PRECISION, POINTER :: val(:) => NULL()
 !
      Mat :: AMAT
      KSP :: SOLVER
   END TYPE petsc_mat
 !
   INTERFACE init
      MODULE PROCEDURE init_petsc_mat
   END INTERFACE init
 !
   INTERFACE clear_mat
      MODULE PROCEDURE clear_petsc_mat
   END INTERFACE clear_mat
 !
   INTERFACE updtmat
      MODULE PROCEDURE updt_petsc_mat
   END INTERFACE updtmat
 !
   INTERFACE putele
      MODULE PROCEDURE putele_petsc_mat
   END INTERFACE putele
 !
   INTERFACE getele
      MODULE PROCEDURE getele_petsc_mat
   END INTERFACE getele
 !
   INTERFACE putrow
      MODULE PROCEDURE putrow_petsc_mat
   END INTERFACE putrow
 !
   INTERFACE getrow
      MODULE PROCEDURE getrow_petsc_mat
   END INTERFACE getrow
 !
   INTERFACE putcol
      MODULE PROCEDURE putcol_petsc_mat
   END INTERFACE putcol
 !
   INTERFACE getcol
      MODULE PROCEDURE getcol_petsc_mat
   END INTERFACE getcol
 !
   INTERFACE get_count
      MODULE PROCEDURE get_count_petsc_mat
   END INTERFACE get_count
 !
   INTERFACE to_mat
      MODULE PROCEDURE to_petsc_mat
   END INTERFACE to_mat
 !
   INTERFACE save_mat
      MODULE PROCEDURE save_petsc_mat
   END INTERFACE save_mat
 !
   INTERFACE load_mat
      MODULE PROCEDURE load_petsc_mat
   END INTERFACE load_mat
 !
   INTERFACE bsolve
      MODULE PROCEDURE bsolve_petsc_mat1,  bsolve_petsc_matn
   END INTERFACE bsolve
 !
   INTERFACE vmx
      MODULE PROCEDURE vmx_petsc_mat,  vmx_petsc_matn
   END INTERFACE vmx
 !
   INTERFACE destroy
      MODULE PROCEDURE destroy_petsc_mat
  END INTERFACE destroy
 !
  INTERFACE mcopy
     MODULE PROCEDURE mcopy_petsc_mat
  END INTERFACE mcopy
 !
  INTERFACE maddto
     MODULE PROCEDURE maddto_petsc_mat
  END INTERFACE maddto
 !
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE init_petsc_mat(n, nterms, mat, kmat, nlsym, &
        &                      nlforce_zero, comm)
 !
 !   Initialize an empty sparse petsc matrix
 !
     USE pputils2
     INCLUDE 'mpif.h'
 !
     INTEGER, INTENT(in)           :: n, nterms
     TYPE(petsc_mat)               :: mat
     INTEGER, OPTIONAL, INTENT(in) :: kmat
     LOGICAL, OPTIONAL, INTENT(in) :: nlsym
     LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
     INTEGER, OPTIONAL, INTENT(in) :: comm
 !
     INTEGER :: me, npes
     INTEGER :: i, ierr, nloc
     PetscBool  :: flg
 !!$    PetscTruth  :: flg ! Petsc version before 3.2
 !
 !    Prologue
 !
     CALL mpi_comm_size(comm, npes, ierr)
     CALL mpi_comm_rank(comm, me, ierr)
 !
     mat%rank = n
     mat%nterms = nterms
     mat%nnz = 0
     mat%nlsym = .FALSE.
     mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
     IF(PRESENT(kmat)) mat%kmat = kmat
     IF(PRESENT(nlsym)) mat%nlsym = nlsym
     IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
 !
 !    Inititialize the PETSC environment
 !
     IF(PRESENT(comm)) THEN
        PETSC_COMM_WORLD = comm
     ELSE  !  Single process Petsc
        PETSC_COMM_WORLD = MPI_COMM_SELF 
     END IF
     CALL PetscInitialize(PETSC_NULL_CHARACTER, ierr)
     mat%comm = PETSC_COMM_WORLD
 !
 !    Matrix partition
 !
     CALL dist1d(mat%comm, 1, n, mat%istart, nloc)
     mat%iend = mat%istart + nloc - 1
 !
     IF(ASSOCIATED(mat%rcounts)) DEALLOCATE(mat%rcounts)
     IF(ASSOCIATED(mat%rdispls)) DEALLOCATE(mat%rdispls)
     ALLOCATE(mat%rcounts(0:npes-1))
     ALLOCATE(mat%rdispls(0:npes-1))
     CALL mpi_allgather(nloc, 1, MPI_INTEGER, mat%rcounts, 1, MPI_INTEGER, &
        &          mat%comm, ierr)
     mat%rdispls(0) = 0
     DO i=1,npes-1
        mat%rdispls(i) = mat%rdispls(i-1)+mat%rcounts(i-1)
     END DO
 !
 !    Initialize linked list for sparse matrix
 !
     IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat)
     ALLOCATE(mat%mat)
     CALL init(n, mat%mat, mat%istart, mat%iend)
 !
 !    Create PETSC matrix
 !
     CALL MatCreate(mat%comm, mat%AMAT, ierr)
     CALL MatSetSizes(mat%AMAT, nloc, nloc, n, n, ierr)
     CALL MatSetFromOptions(mat%AMAT, ierr)
 !
 !   Create PETSC SOLVER
 !
     CALL KSPCreate(mat%comm, mat%SOLVER, ierr)
 !
   END SUBROUTINE init_petsc_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE clear_petsc_mat(mat)
 !
 !   Clear matrix, keeping its sparse structure unchanged
 !
     TYPE(petsc_mat) :: mat
 !
     IF(ASSOCIATED(mat%val)) THEN
        mat%val = 0.0d0
     ELSE
        CALL MatZeroEntries(mat%AMAT)
     END IF
   END SUBROUTINE clear_petsc_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE updt_petsc_mat(mat, i, j, val)
 !
 !   Update element Aij of petsc  matrix
 !
     TYPE(petsc_mat)            :: mat
     INTEGER, INTENT(in)          :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
     INTEGER :: ierr
 !
     IF(mat%nlsym) THEN   ! Store only upper part for symmetric matrices
        IF(i.GT.j) RETURN
     END IF
     IF(i.LT.mat%istart .OR. i.GT.mat%iend) THEN
        WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
        WRITE(*,'(a,2i6)') '      istart, iend      ', mat%istart, mat%iend
        STOP '*** Abnormal EXIT in MODULE mumps_mod ***'
     END IF
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL updtmat(mat%mat, i, j, val)
     ELSE
        CALL MatSetValue(mat%AMAT, i-1, j-1, ADD_VALUES, ierr)
     END IF
   END SUBROUTINE updt_petsc_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putele_petsc_mat(mat, i, j, val)
 !
 !  Put element (i,j) of matrix
 !
     TYPE(petsc_mat)            :: mat
     INTEGER, INTENT(in)          :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
     INTEGER :: iput, jput
     INTEGER :: ierr
 !
     iput = i
     jput = j
     IF(mat%nlsym) THEN
        IF( i.GT.j ) THEN    ! Lower triangular part
           iput = j
           jput = i
        END IF
     END IF
 !
 !    Do nothing if outside
     IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL putele(mat%mat, iput, jput, val, &
             &      nlforce_zero=mat%nlforce_zero)
     ELSE
        CALL MatSetValue(mat%AMAT, iput-1, jput-1, val, INSERT_VALUES, ierr)
     END IF
   END SUBROUTINE putele_petsc_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getele_petsc_mat(mat, i, j, val)
 !
 !   Get element (i,j) of sparse matrix
 !
     TYPE(petsc_mat)             :: mat
     INTEGER, INTENT(in)           :: i, j
     DOUBLE PRECISION, INTENT(out) :: val
     INTEGER :: iget, jget
     INTEGER :: ierr
 !
     iget = i
     jget = j
     IF(mat%nlsym) THEN
        IF( i.GT.j ) THEN    ! Lower triangular part
           iget = j
           jget = i
        END IF
     END IF
 !
     val = 0.0d0   ! Assume zero val if outside
     IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL getele(mat%mat, iget, jget, val)
     ELSE
        CALL MatGetValues(mat%AMAT, 1, iget-1, 1, jget-1, val, ierr)
     END IF
   END SUBROUTINE getele_petsc_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putrow_petsc_mat(amat, i, arr, cols)
 !
 ! Put a row into sparse matrix
 !
     TYPE(petsc_mat), INTENT(inout) :: amat
     INTEGER, INTENT(in)            :: i
     DOUBLE PRECISION, INTENT(in)   :: arr(:)
     INTEGER, INTENT(in), OPTIONAL  :: cols(:)
     INTEGER :: j
 !
     IF(i.GT.amat%iend .OR. i.LT.amat%istart) RETURN ! Do nothing
 !
     IF(PRESENT(cols)) THEN
        DO j=1,SIZE(cols)
           CALL putele(amat, i, cols(j), arr(j))
        END DO
     ELSE
        DO j=1,amat%rank
           CALL putele(amat, i, j, arr(j))
        END DO
     END IF
   END SUBROUTINE putrow_petsc_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getrow_petsc_mat(amat, i, arr)
 !
 !   Get a row from sparse matrix
 !
     TYPE(petsc_mat), INTENT(in)  :: amat
     INTEGER, INTENT(in)            :: i
     DOUBLE PRECISION, INTENT(out)  :: arr(:)
     INTEGER :: j, ierr
     INTEGER :: ncols, cols(amat%rank)
     DOUBLE PRECISION :: vals(amat%rank)
 !
     arr = 0.0d0
     IF(i.GT.amat%iend .OR. i.LT.amat%istart) RETURN ! return 0 if outside
     IF(ASSOCIATED(amat%mat)) THEN
        DO j=1,amat%rank
           CALL getele(amat%mat, i, j, arr(j))
        END DO
     ELSE
        CALL MatGetRow(amat%AMAT, i-1, ncols, cols, vals,  ierr) ! 0-based array
        DO j=1,ncols
           arr(cols(j)+1) = vals(j)
        END DO
        CALL MatRestoreRow(amat%AMAT, i-1, ncols, cols, vals,  ierr)
     END IF
   END SUBROUTINE getrow_petsc_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putcol_petsc_mat(amat, j, arr)
 !
 ! Put a column into sparse matrix
 !
     TYPE(petsc_mat), INTENT(inout) :: amat
     INTEGER, INTENT(in)              :: j
     DOUBLE PRECISION, INTENT(in)     :: arr(:)
     INTEGER :: i
 !
     DO i=amat%istart,amat%iend
        CALL putele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE putcol_petsc_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getcol_petsc_mat(amat, j, arr)
 !
 !   Get a column from sparse matrix
 !
     TYPE(petsc_mat), INTENT(in)  :: amat
     INTEGER, INTENT(in)            :: j
     DOUBLE PRECISION, INTENT(out)  :: arr(:)
     INTEGER :: i
 !
     arr = 0.0d0
     DO i=amat%istart,amat%iend
        CALL getele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE getcol_petsc_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   INTEGER FUNCTION get_count_petsc_mat(mat, nnz)
 !
 !   Number of non-zeros in sparse matrix
 !
     TYPE(petsc_mat)              :: mat
     INTEGER, INTENT(out), OPTIONAL :: nnz(:)
     INTEGER :: i, ierr
     DOUBLE PRECISION :: info(MAT_INFO_SIZE)
 !
     IF(ASSOCIATED(mat%mat)) THEN
        get_count_petsc_mat = get_count(mat%mat, nnz)
     ELSE
        CALL MatGetInfo(mat%AMAT, MAT_LOCAL, info, ierr)
        get_count_petsc_mat = info(MAT_INFO_NZ_ALLOCATED)
 !!$       IF(PRESENT(nnz)) THEN
 !!$          DO i=1,mat%rank
 !!$             nnz(i) = mat%irow(i+1)-mat%irow(i)
 !!$          END DO
 !!$       END IF
     END IF
   END FUNCTION get_count_petsc_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE to_petsc_mat(mat, nlkeep)
 !
 !   Convert linked list spmat to petsc matrice structure
 !
     TYPE(petsc_mat) :: mat
     LOGICAL, INTENT(in), OPTIONAL :: nlkeep
 !
     INTEGER :: me, i, j, jj, nnz, rank, s, e
     INTEGER :: istart, iend
     INTEGER :: iloc, k1, k2, ncol
     INTEGER :: d_nz, d_nnz(mat%istart:mat%iend)
     INTEGER :: o_nz, o_nnz(mat%istart:mat%iend)
     INTEGER :: comm, ierr
     LOGICAL :: nlclean
 !
     nlclean = .TRUE.
     IF(PRESENT(nlkeep)) THEN
        nlclean = .NOT. nlkeep
     END IF
 !
     comm = mat%comm
     CALL mpi_comm_rank(comm, me, ierr)
 !
 !    Allocate the Petsc matrix structure
 !
     rank = mat%rank
     mat%nnz_loc = get_count(mat)
     istart = mat%istart
     iend = mat%iend
     CALL mpi_allreduce(mat%nnz_loc, mat%nnz, 1, MPI_INTEGER8, MPI_SUM, comm, ierr)
 !
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     ALLOCATE(mat%val(mat%nnz_loc))
     ALLOCATE(mat%cols(mat%nnz_loc))
     ALLOCATE(mat%irow(mat%istart:mat%iend+1))
 !
 !    Get Sparse structure from linked list
 !
     d_nnz(:) = 0
     o_nnz(:) = 0
     mat%irow(istart) = 1
     DO i=istart,iend
        mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz
        s = mat%irow(i)
        e = mat%irow(i+1)-1
        CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e))
        d_nnz(i) = COUNT(mat%cols(s:e) .GE. istart .AND. &
             &           mat%cols(s:e) .LE. iend)
        o_nnz(i) = mat%mat%row(i)%nnz - d_nnz(i)
        IF(nlclean) CALL destroy(mat%mat%row(i))
     END DO
     IF(nlclean) DEALLOCATE(mat%mat)
 !
 !    Petsc matrix preallocation
 !
     CALL MatMPIAIJSetPreallocation(mat%AMAT, PETSC_NULL_INTEGER, &
          &      d_nnz, PETSC_NULL_INTEGER,  o_nnz, ierr)
     CALL MatSeqAIJSetPreallocation(mat%AMAT, PETSC_NULL_INTEGER, &
          &      d_nnz, ierr)
 !
 !    Petsc matrix assembly  
 !
     mat%cols = mat%cols-1   ! Start column index = 0
     DO i=istart,iend
        iloc = i-istart+1
        k1 = mat%irow(i)
        k2 = mat%irow(i+1)
        ncol = k2-k1
        CALL MatSetValues(mat%AMAT, 1, i-1, ncol, mat%cols(k1:k2-1), &
           &            mat%val(k1:k2-1), INSERT_VALUES, ierr)
     END DO
 !
     CALL MatAssemblyBegin(mat%AMAT, MAT_FINAL_ASSEMBLY ,ierr)
     CALL MatAssemblyEnd(mat%AMAT, MAT_FINAL_ASSEMBLY, ierr)
 !
     DEALLOCATE(mat%irow)
     DEALLOCATE(mat%cols)
     DEALLOCATE(mat%val)
 !
   END SUBROUTINE to_petsc_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE save_petsc_mat(mat, file)
 !
 !   Save matrix in PETSC binary format
 !
     TYPE(petsc_mat) :: mat
     CHARACTER(len=*), INTENT(in) :: file
 !
     INTEGER :: ierr
     PetscViewer ::  viewer
 !
     CALL PetscViewerBinaryOpen(mat%comm, TRIM(file), FILE_MODE_WRITE,&
          &     viewer, ierr)
     CALL MatView(mat%AMAT, viewer, ierr)
     CALL PetscViewerDestroy(viewer, ierr)
 !    
   END SUBROUTINE save_petsc_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE load_petsc_mat(mat, file)
 !
 !   Load matrix in PETSC binary format
 !
     TYPE(petsc_mat) :: mat
     CHARACTER(len=*), INTENT(in) :: file
 !
     INTEGER :: nloc, i, npes, ierr
     PetscViewer ::  viewer
 !
     CALL mpi_comm_size(mat%comm, npes, ierr)
 !
 !   Clean up unneeded sparse matrix
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL destroy(mat%mat)
        DEALLOCATE(mat%mat)
     END IF
 !
 !   Load matrix from file
 !
     CALL PetscViewerBinaryOpen(mat%comm, TRIM(file), FILE_MODE_READ,&
          &     viewer, ierr)
     CALL MatLoad(mat%AMAT, viewer, ierr)
     CALL PetscViewerDestroy(viewer, ierr)
 !
 !  Some mat info
 !
     CALL MatGetSize(mat%AMAT, mat%rank, PETSC_NULL_INTEGER, ierr)
     mat%nnz_loc = get_count(mat)
     CALL mpi_allreduce(mat%nnz_loc, mat%nnz, 1, MPI_INTEGER8, MPI_SUM, &
          &    mat%comm, ierr)
 !
 !
 !  Recompute matrix partition from loaded matrix
 !
     CALL MatGetOwnershipRange(mat%AMAT, mat%istart, mat%iend, ierr)
     mat%istart = mat%istart+1   ! Convert from Petsc definition
     nloc = mat%iend - mat%istart + 1
     CALL mpi_allgather(nloc, 1, MPI_INTEGER, mat%rcounts, 1, MPI_INTEGER, &
        &          mat%comm, ierr)
     mat%rdispls(0) = 0
     DO i=1,npes-1
        mat%rdispls(i) = mat%rdispls(i-1)+mat%rcounts(i-1)
     END DO
 !    
   END SUBROUTINE load_petsc_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE bsolve_petsc_mat1(mat, rhs, sol, rtol_in, nitmax_in, nits)
 !
 !   Backsolve, using Petsc
 !
     TYPE(petsc_mat) :: mat
     DOUBLE PRECISION, INTENT(inout) :: rhs(:)
     DOUBLE PRECISION, INTENT(out), OPTIONAL :: sol(:)
     DOUBLE PRECISION, INTENT(in), OPTIONAL  :: rtol_in
     INTEGER, INTENT(in), OPTIONAL :: nitmax_in
     INTEGER, INTENT(out), OPTIONAL :: nits
 !
     DOUBLE PRECISION :: rtol=PETSC_DEFAULT_DOUBLE_PRECISION
     INTEGER :: nitmax=PETSC_DEFAULT_INTEGER
     INTEGER :: i, istart, iend, nrank_loc, nrank
     INTEGER :: npes, me, ierr
     INTEGER :: idx(mat%istart:mat%iend)
 !
     Vec :: vec_rhs, vec_sol
     PetscScalar :: scal
     PetscScalar, POINTER :: psol_loc(:)
     KSPConvergedReason :: reason
 !
     CALL mpi_comm_size(mat%comm, npes, ierr)
     CALL mpi_comm_rank(mat%comm, me, ierr)
 !
     istart = mat%istart
     iend = mat%iend
     nrank_loc = iend-istart+1
     nrank = mat%rank
     idx = (/ (i, i=istart,iend) /) - 1  ! 0-based petsc vector
 !
 !      Create Vectors
 !
     CALL VecCreate(mat%comm, vec_rhs, ierr)
     CALL VecSetSizes(vec_rhs, nrank_loc, nrank, ierr)
     CALL VecSetFromOptions(vec_rhs, ierr)
     CALL VecDuplicate(vec_rhs, vec_sol, ierr)
 !
 !     Set solver
 !
     IF(PRESENT(rtol_in)) rtol = rtol_in
     IF(PRESENT(nitmax_in)) nitmax = nitmax_in
 !
     CALL KSPSetOperators(mat%SOLVER, mat%AMAT, mat%AMAT, SAME_PRECONDITIONER, ierr)
     CALL KSPSetTolerances(mat%SOLVER, rtol, PETSC_DEFAULT_DOUBLE_PRECISION,&
          &  PETSC_DEFAULT_DOUBLE_PRECISION, nitmax, ierr)
     CALL KSPSetFromOptions(mat%SOLVER, ierr)
 !
 !     Set RHS
 !
     CALL VecSetValues(vec_rhs, nrank_loc, idx, rhs(istart), INSERT_VALUES, ierr)
     CALL VecAssemblyBegin(vec_rhs, ierr)
     CALL VecAssemblyEnd(vec_rhs, ierr)
 !
     CALL KSPSolve(mat%SOLVER, vec_rhs, vec_sol, ierr)
     CALL KSPGetConvergedReason(mat%SOLVER, reason, ierr)
     IF(reason .LT. 0) THEN
        IF(me.EQ.0) WRITE(*,'(a,i4)') 'BSOLVE: diverges with reason', reason
     END IF
     IF(PRESENT(nits)) THEN
        CALL KSPGetIterationNumber(mat%SOLVER, nits, ierr)
     END IF
 !
 !     Get global solutions on all MPI processes
 !
     CALL VecGetArrayF90(vec_sol, psol_loc, ierr)
 !
     IF(PRESENT(sol)) THEN
        CALL mpi_allgatherv(psol_loc, nrank_loc, MPI_DOUBLE_PRECISION, &
             &      sol, mat%rcounts, mat%rdispls, MPI_DOUBLE_PRECISION, &
             &      mat%comm, ierr)
     ELSE
        CALL mpi_allgatherv(psol_loc, nrank_loc, MPI_DOUBLE_PRECISION, &
             &      rhs, mat%rcounts, mat%rdispls, MPI_DOUBLE_PRECISION, &
             &      mat%comm, ierr)
     END IF
 !
     CALL VecRestoreArrayF90(vec_sol, psol_loc, ierr)
    END SUBROUTINE bsolve_petsc_mat1
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE bsolve_petsc_matn(mat, rhs, sol, rtol_in, nitmax_in, nits)
 !
 !   Backsolve, using Petsc, multiple RHS
 !
     TYPE(petsc_mat)  :: mat
     DOUBLE PRECISION :: rhs(:,:)
     DOUBLE PRECISION, OPTIONAL :: sol(:,:)
     DOUBLE PRECISION, INTENT(in), OPTIONAL  :: rtol_in
     INTEGER, INTENT(in), OPTIONAL :: nitmax_in
     INTEGER, INTENT(out), OPTIONAL :: nits(:)
 !
     DOUBLE PRECISION :: rtol=PETSC_DEFAULT_DOUBLE_PRECISION
     INTEGER :: nitmax=PETSC_DEFAULT_INTEGER
     INTEGER :: j, nrhs
     INTEGER :: i, istart, iend, nrank_loc, nrank
     INTEGER :: npes, me, ierr
     INTEGER :: idx(mat%istart:mat%iend)
 !
     Vec :: vec_rhs, vec_sol
     PetscScalar :: scal
     PetscScalar, POINTER :: psol_loc(:)
     KSPConvergedReason :: reason
 !
     CALL mpi_comm_size(mat%comm, npes, ierr)
     CALL mpi_comm_rank(mat%comm, me, ierr)
 !
     nrhs = SIZE(rhs,2)
     istart = mat%istart
     iend = mat%iend
     nrank_loc = iend-istart+1
     nrank = mat%rank
     idx = (/ (i, i=istart,iend) /) - 1  ! 0-based petsc vector
 !
 !      Create Vectors
 !
     CALL VecCreate(mat%comm, vec_rhs, ierr)
     CALL VecSetSizes(vec_rhs, nrank_loc, nrank, ierr)
     CALL VecSetFromOptions(vec_rhs, ierr)
     CALL VecDuplicate(vec_rhs, vec_sol, ierr)
 !
 !     Set solver
 !
     IF(PRESENT(rtol_in)) rtol = rtol_in
     IF(PRESENT(nitmax_in)) nitmax = nitmax_in
 !
     CALL KSPSetOperators(mat%SOLVER, mat%AMAT, mat%AMAT, SAME_PRECONDITIONER, ierr)
     CALL KSPSetTolerances(mat%SOLVER, rtol, PETSC_DEFAULT_DOUBLE_PRECISION,&
          &  PETSC_DEFAULT_DOUBLE_PRECISION, nitmax, ierr)
     CALL KSPSetFromOptions(mat%SOLVER, ierr)
 !
 !     Set RHS
 !
     DO j=1,nrhs
        CALL VecSetValues(vec_rhs, nrank_loc, idx, rhs(istart,j), INSERT_VALUES, ierr)
        CALL VecAssemblyBegin(vec_rhs, ierr)
        CALL VecAssemblyEnd(vec_rhs, ierr)
 !
        CALL KSPSolve(mat%SOLVER, vec_rhs, vec_sol, ierr)
        CALL KSPGetConvergedReason(mat%SOLVER, reason, ierr)
        IF(reason .LT. 0) THEN
           IF(me.EQ.0) THEN 
              WRITE(*,'(a,i4,a,i8)') 'BSOLVE: diverges with reason', reason, &
                   &  ' for j =', j
           END IF
        END IF
        IF(PRESENT(nits)) THEN
           CALL KSPGetIterationNumber(mat%SOLVER, nits(j), ierr)
        END IF
 !
 !     Get global solutions on all MPI processes
 !
        CALL VecGetArrayF90(vec_sol, psol_loc, ierr)
 !
        IF(PRESENT(sol)) THEN
           CALL mpi_allgatherv(psol_loc, nrank_loc, MPI_DOUBLE_PRECISION, &
                &      sol(1,j), mat%rcounts, mat%rdispls, MPI_DOUBLE_PRECISION, &
                &      mat%comm, ierr)
        ELSE
           CALL mpi_allgatherv(psol_loc, nrank_loc, MPI_DOUBLE_PRECISION, &
                &      rhs(1,j), mat%rcounts, mat%rdispls, MPI_DOUBLE_PRECISION, &
                &      mat%comm, ierr)
        END IF
 !
        CALL VecRestoreArrayF90(vec_sol, psol_loc, ierr)
     END DO
 !
   END SUBROUTINE bsolve_petsc_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_petsc_mat(mat, xarr) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(petsc_mat)             :: mat
     DOUBLE PRECISION, INTENT(in)  :: xarr(:)
     DOUBLE PRECISION              :: yarr(SIZE(xarr))
     DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
     CHARACTER(len=6) :: matdescra
     INTEGER :: n, i, j
 !
     n = mat%rank
 !
 #ifdef MKL
     IF(mat%nlsym) THEN
        matdescra = 'sun'
     ELSE
        matdescra = 'g'
     END IF
 !
     CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, &
          &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
          &          beta, yarr)
 #else
     yarr = 0.0d0
     DO i=1,n
        DO j=mat%irow(i), mat%irow(i+1)-1
           yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
        END DO
        IF(mat%nlsym) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j)) = yarr(mat%cols(j)) &
                   &              + mat%val(j)*xarr(i)
           END DO
        END IF
     END DO
 #endif
 !
   END FUNCTION vmx_petsc_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_petsc_matn(mat, xarr) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(petsc_mat)             :: mat
     DOUBLE PRECISION, INTENT(in)  :: xarr(:,:)
     DOUBLE PRECISION              :: yarr(SIZE(xarr,1),SIZE(xarr,2))
 !
     DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
     INTEGER :: n, nrhs, i, j
     CHARACTER(len=6) :: matdescra
 !
     n = mat%rank
     nrhs = SIZE(xarr,2)
 !
 #ifdef MKL
     IF(mat%nlsym) THEN
        matdescra = 'sun'
     ELSE
        matdescra = 'g'
     END IF
 !
     CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,&
          &           mat%cols, mat%irow(1), mat%irow(2), xarr, &
          &           n, beta, yarr, n)
 #else
     yarr = 0.0d0
     DO i=1,n
        DO j=mat%irow(i), mat%irow(i+1)-1
           yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
        END DO
        IF(mat%nlsym) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
                   &                + mat%val(j)*xarr(i,:)
           END DO
        END IF
     END DO
 #endif
 !
   END FUNCTION vmx_petsc_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE destroy_petsc_mat(mat)
 !
 !  Deallocate the sparse matrix mat
 !
     TYPE(petsc_mat) :: mat
     INTEGER :: ierr
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL destroy(mat%mat)
        DEALLOCATE(mat%mat)
     END IF
 !
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
 !
     CALL MatDestroy(mat%AMAT,ierr)
   END SUBROUTINE destroy_petsc_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE mcopy_petsc_mat(mata, matb)
 !
 !   Matrix copy: B = A
 !
     TYPE(petsc_mat) :: mata, matb
     INTEGER :: ierr
 !
 ! Assume that matb was already initialized by init_petsc_mat.
     IF(matb%rank.LE.0) THEN
        WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT'
        STOP '*** Abnormal EXIT in MODULE petsc_mod ***'
     END IF
 !
     IF(ASSOCIATED(matb%mat)) THEN 
        CALL destroy(matb%mat)
        DEALLOCATE(matb%mat)
     END IF
 !
     matb%rank =  mata%rank
     matb%nnz = mata%nnz
     matb%nnz_loc = mata%nnz_loc
     matb%istart = mata%istart
     matb%iend = mata%iend
     matb%nlsym = mata%nlsym
     matb%nlforce_zero = mata%nlforce_zero
 !
     IF(ASSOCIATED(matb%mat)) THEN 
        CALL destroy(matb%mat)
        DEALLOCATE(matb%mat)    
     END IF
 !
 !    Destroy existing PETSC matrix and recreate a new one from scratch
 !
     CALL MatDestroy(matb%AMAT, ierr)
     CALL MatConvert(mata%AMAT, MATSAME, MAT_INITIAL_MATRIX, matb%AMAT, ierr)
   END SUBROUTINE mcopy_petsc_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE maddto_petsc_mat(mata, alpha, matb)
 !
 !   A <- A + alpha*B
 !
     TYPE(petsc_mat) :: mata, matb
     DOUBLE PRECISION  :: alpha
 !
     mata%val = mata%val + alpha*matb%val
   END SUBROUTINE maddto_petsc_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 !
 END MODULE petsc_bsplines
diff --git a/src/psum_mat.tpl b/src/psum_mat.tpl
index bf44e63..c13fe33 100644
--- a/src/psum_mat.tpl
+++ b/src/psum_mat.tpl
@@ -1,101 +1,101 @@
 !>
 !> @file psum_mat.tpl
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
     INTEGER, INTENT(in) :: comm
 !
     INTEGER :: me, npes, ierr
     INTEGER :: n, r, i, base
     INTEGER :: newrank
 !---------------------------------------------------------------------
 !                1.0    Prologue
 !
     CALL mpi_comm_size(comm, npes, ierr)
     CALL mpi_comm_rank(comm, me, ierr)
 !
 !    Compute n and r defined by npes = 2**n+r
     i=1
     n=0
     DO WHILE (2*i.LE.npes)
        n=n+1
        i=2*i
     END DO ! i=2**n
     r = npes-i
 !---------------------------------------------------------------------
 !                2.0    Node partition
 !
 !  I: nodes with ranks < 2*r
 !      . nodes with even ranks receive from rank+1 and sum
 !      . odd ranks sends to rank-1
 ! II: nodes with ranks >= 2*r
 !      . do nothing
 !
   IF( me .LT. 2*r ) THEN
      IF( MODULO(me,2) .EQ. 0 ) THEN
         CALL p2p_mat(mat, me+1, 'recv', 'updt', comm)
      ELSE
         CALL p2p_mat(mat, me-1, 'send', 'updt', comm)
      END IF
   END IF
 !---------------------------------------------------------------------
 !                3.0    Binary tree reduction using new ranks
 !
 !   Define new ranks
   IF( MODULO(me,2).EQ.0 .AND. me.LT.2*r ) THEN  ! new rank in I
      newrank = me/2
   ELSE IF(  me.GE.2*r ) THEN ! new ranks in II
      newrank = me-r
   ELSE   ! inactive ranks in I          
      newrank = -1
   END IF
 !
 !   Reduction with 2**n (positive) newranks 
   IF( newrank .GE. 0 ) THEN   ! only for nodes with new rank > 0
      base = 1
      DO i=1,n
         CALL p2p_mat(mat, oldrank(IEOR(newrank,base)), &
              &       'sendrecv', 'updt', comm)
         base = base*2
      END DO
   END IF
 !---------------------------------------------------------------------
 !                4.0    Final exchanche in I
 !
   IF( me .LT. 2*r ) THEN
      IF( MODULO(me,2).EQ.0 ) THEN
         CALL p2p_mat(mat, me+1, 'send', 'put', comm)
      ELSE
         CALL p2p_mat(mat, me-1, 'recv', 'put', comm)
      END IF
   END IF
 !---------------------------------------------------------------------
 CONTAINS
   INTEGER FUNCTION oldrank(rank)
     INTEGER, INTENT(in) :: rank
     IF(rank.LT.r) THEN
        oldrank = 2*rank
     ELSE
        oldrank = rank+r
     END IF
   END FUNCTION oldrank
diff --git a/src/pwsmp_mod.f90 b/src/pwsmp_mod.f90
index b981cde..ee22c57 100644
--- a/src/pwsmp_mod.f90
+++ b/src/pwsmp_mod.f90
@@ -1,2032 +1,2032 @@
 !>
 !> @file pwsmp_mod.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE pwsmp_bsplines
 !
 !    PWSMP_BSPLINES: Simple interface to the parallel sparse direct 
 !                    solver PWSMP.
 !
 !    T.M. Tran, CRPP-EPFL
 !    December 2011
 !
   USE sparse
   IMPLICIT NONE
 !
   INTEGER, SAVE :: current_matid = -1
   INTEGER, SAVE :: last_matid = -1
 !
   TYPE wsmp_param
      INTEGER          :: iparm(64)
      DOUBLE PRECISION :: dparm(64)
   END TYPE wsmp_param
 !
   TYPE wsmp_mat
      INTEGER :: matid=-1
      INTEGER :: rank=0, nnz
      INTEGER :: nterms, kmat, nrhs
      INTEGER :: comm
      INTEGER :: istart, iend, rank_loc
      INTEGER :: nnz_start, nnz_end, nnz_loc
      LOGICAL :: nlsym
      LOGICAL :: nlpos
      LOGICAL :: nlforce_zero
      TYPE(spmat), POINTER      :: mat => NULL()
      INTEGER, POINTER          :: cols(:) => NULL()
      INTEGER, POINTER          :: irow(:) => NULL()
      INTEGER, POINTER          :: perm(:) => NULL()
      INTEGER, POINTER          :: invp(:) => NULL()
      INTEGER, POINTER          :: mrp(:) => NULL()
      DOUBLE PRECISION, POINTER :: diag(:) => NULL()
      DOUBLE PRECISION, POINTER :: val(:) => NULL()
      DOUBLE PRECISION, POINTER :: aux(:) => NULL()
      TYPE(wsmp_param)       :: p
   END TYPE wsmp_mat
 !
   TYPE zwsmp_mat
      INTEGER :: matid=-1
      INTEGER :: rank=0, nnz
      INTEGER :: nterms, kmat, nrhs
      INTEGER :: comm
      INTEGER :: istart, iend, rank_loc
      INTEGER :: nnz_start, nnz_end, nnz_loc
      LOGICAL :: nlherm
      LOGICAL :: nlsym
      LOGICAL :: nlpos
      LOGICAL :: nlforce_zero
      TYPE(zspmat), POINTER     :: mat => NULL()
      INTEGER, POINTER          :: cols(:) => NULL()
      INTEGER, POINTER          :: irow(:) => NULL()
      INTEGER, POINTER          :: perm(:) => NULL()
      INTEGER, POINTER          :: invp(:) => NULL()
      INTEGER, POINTER          :: mrp(:) => NULL()
      DOUBLE COMPLEX, POINTER :: diag(:) => NULL()
      DOUBLE COMPLEX, POINTER :: val(:) => NULL()
      DOUBLE COMPLEX, POINTER :: aux(:) => NULL()
      TYPE(wsmp_param)       :: p
   END TYPE zwsmp_mat
 !
   INTERFACE init
      MODULE PROCEDURE init_wsmp_mat, init_zwsmp_mat
   END INTERFACE init
 !
   INTERFACE check_mat
      MODULE PROCEDURE check_wsmp_mat, check_zwsmp_mat
   END INTERFACE check_mat
 !
   INTERFACE clear_mat
      MODULE PROCEDURE clear_wsmp_mat, clear_zwsmp_mat
   END INTERFACE clear_mat
 !
   INTERFACE updtmat
      MODULE PROCEDURE updt_wsmp_mat, updt_zwsmp_mat
   END INTERFACE updtmat
 !
   INTERFACE putele
      MODULE PROCEDURE putele_wsmp_mat, putele_zwsmp_mat
   END INTERFACE putele
 !
   INTERFACE getele
      MODULE PROCEDURE getele_wsmp_mat, getele_zwsmp_mat
   END INTERFACE getele
 !
   INTERFACE putrow
      MODULE PROCEDURE putrow_wsmp_mat, putrow_zwsmp_mat
   END INTERFACE putrow
 !
   INTERFACE getrow
      MODULE PROCEDURE getrow_wsmp_mat, getrow_zwsmp_mat
   END INTERFACE getrow
 !
   INTERFACE putcol
      MODULE PROCEDURE putcol_wsmp_mat, putcol_zwsmp_mat
   END INTERFACE putcol
 !
   INTERFACE getcol
      MODULE PROCEDURE getcol_wsmp_mat, getcol_zwsmp_mat
   END INTERFACE getcol
 !
   INTERFACE get_count
      MODULE PROCEDURE get_count_wsmp_mat, get_count_zwsmp_mat
   END INTERFACE get_count
 !
   INTERFACE to_mat
      MODULE PROCEDURE to_wsmp_mat, to_zwsmp_mat
   END INTERFACE to_mat
 !
   INTERFACE reord_mat
      MODULE PROCEDURE reord_wsmp_mat, reord_zwsmp_mat
   END INTERFACE reord_mat
 !
   INTERFACE numfact
      MODULE PROCEDURE numfact_wsmp_mat, numfact_zwsmp_mat
   END INTERFACE numfact
 !
   INTERFACE factor
      MODULE PROCEDURE factor_wsmp_mat, factor_zwsmp_mat
   END INTERFACE factor
 !
   INTERFACE bsolve
      MODULE PROCEDURE bsolve_wsmp_mat1,  bsolve_wsmp_matn, &
           &           bsolve_zwsmp_mat1, bsolve_zwsmp_matn
   END INTERFACE bsolve
 !
   INTERFACE vmx
      MODULE PROCEDURE vmx_wsmp_mat,  vmx_wsmp_matn, &
           &           vmx_zwsmp_mat, vmx_zwsmp_matn
   END INTERFACE vmx
 !
   INTERFACE destroy
      MODULE PROCEDURE destroy_wsmp_mat, destroy_zwsmp_mat
  END INTERFACE destroy
 !
  INTERFACE putmat
     MODULE PROCEDURE put_wsmp_mat, put_zwsmp_mat
  END INTERFACE putmat
 !
  INTERFACE getmat
     MODULE PROCEDURE get_wsmp_mat, get_zwsmp_mat
  END INTERFACE getmat
 !
  INTERFACE mcopy
     MODULE PROCEDURE mcopy_wsmp_mat,  mcopy_zwsmp_mat
  END INTERFACE mcopy
 !
  INTERFACE maddto
     MODULE PROCEDURE maddto_wsmp_mat, maddto_zwsmp_mat
  END INTERFACE maddto
 !
  INTERFACE psum_mat
     MODULE PROCEDURE psum_wsmp_mat, psum_zwsmp_mat
  END INTERFACE psum_mat
 !
  INTERFACE p2p_mat
     MODULE PROCEDURE p2p_wsmp_mat, p2p_zwsmp_mat
  END INTERFACE p2p_mat
 !
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE init_wsmp_mat(n, nterms, mat, kmat, nlsym, nlpos, &
        &                   nlforce_zero, comm_in)
 !
 !   Initialize an empty sparse wsmp matrix
 !
     USE pputils2
     INCLUDE 'mpif.h'
     INTEGER, INTENT(in)           :: n, nterms
     TYPE(wsmp_mat)                :: mat
     INTEGER, OPTIONAL, INTENT(in) :: kmat
     LOGICAL, OPTIONAL, INTENT(in) :: nlsym
     LOGICAL, OPTIONAL, INTENT(in) :: nlpos
     LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
     INTEGER, OPTIONAL, INTENT(in) :: comm_in
 !
     INTEGER :: comm, nloc
     INTEGER :: info
     INTEGER :: idummy = 0
     DOUBLE PRECISION :: dummy = 0.0d0
 !
     comm = MPI_COMM_WORLD
     IF(PRESENT(comm_in)) comm = comm_in
     mat%comm = comm
 !
 !  Store away (valid) current matrix id
 !
     IF(current_matid .GE. 0) THEN
        CALL wstoremat(current_matid, info)
        IF(info.NE.0) THEN
           WRITE(*,'(a,i12)') 'INIT: WSTOREMAT failed WITH error', info
           STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
        END IF
     END IF
     last_matid = last_matid+1
     mat%matid = last_matid
     current_matid = mat%matid
 !
 !  Initialize sparse matrice structure
 !
     mat%rank = n
     mat%nterms = nterms
     mat%nnz = 0
     mat%nlsym = .FALSE.
     mat%nlpos = .TRUE.
     mat%nrhs = 1
     mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
     IF(PRESENT(kmat)) mat%kmat = kmat
     IF(PRESENT(nlsym)) mat%nlsym = nlsym
     IF(PRESENT(nlpos)) mat%nlpos = nlpos
     IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
 !
 !    Matrix partition
 !
     CALL dist1d(comm, 1, n, mat%istart, nloc)
     mat%iend = mat%istart + nloc - 1
     mat%rank_loc = nloc
 !
     IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat)
     ALLOCATE(mat%mat)
     CALL init(n, mat%mat, mat%istart, mat%iend)
 !
 !  Fill 'iparm' and 'dparm' with default values
 !
     mat%p%iparm(1:3) = 0
     IF(mat%nlsym) THEN
        CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
             &      mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, &
             &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
             &      mat%p%dparm)
        IF(mat%nlpos) THEN
           mat%p%iparm(31) = 0
        ELSE
 !!$          mat%p%iparm(31) = 1  ! LDL^T without pivoting
           mat%p%iparm(31) = 2  ! LDL^T with pivoting
        END IF
        CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank, &
             &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
     END IF
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i12)') 'INIT: Initialization failed with error', &
             &             mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
 !
     CALL setup_wsmp(mat%p%iparm, mat%p%dparm)
 !
   CONTAINS
     SUBROUTINE setup_wsmp(iparm, dparm)
       INTEGER          :: iparm(:)
       DOUBLE PRECISION :: dparm(:)
     END SUBROUTINE setup_wsmp
   END SUBROUTINE init_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE init_zwsmp_mat(n, nterms, mat, kmat, nlsym, nlherm, &
        &                   nlpos, nlforce_zero, comm_in)
 !
 !   Initialize an empty sparse wsmp matrix
 !
     USE pputils2
     INCLUDE 'mpif.h'
     INTEGER, INTENT(in)           :: n, nterms
     TYPE(zwsmp_mat)               :: mat
     INTEGER, OPTIONAL, INTENT(in) :: kmat
     LOGICAL, OPTIONAL, INTENT(in) :: nlsym
     LOGICAL, OPTIONAL, INTENT(in) :: nlherm
     LOGICAL, OPTIONAL, INTENT(in) :: nlpos
     LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
     INTEGER, OPTIONAL, INTENT(in) :: comm_in
 !
     INTEGER :: comm, nloc
     INTEGER :: info
     INTEGER :: idummy = 0
     DOUBLE COMPLEX :: dummy = 0.0d0
 !
     comm = MPI_COMM_WORLD
     IF(PRESENT(comm_in)) comm = comm_in
     mat%comm = comm
 !
 !  Store away (valid) current matrix id
 !
     IF(current_matid .GE. 0) THEN
        CALL wstoremat(current_matid, info)
        IF(info.NE.0) THEN
           WRITE(*,'(a,i12)') 'INIT: WSTOREMAT failed WITH error', info
           STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
        END IF
     END IF
     last_matid = last_matid+1
     mat%matid = last_matid
     current_matid = mat%matid
 !
 !  Initialize sparse matrice structure
 !
     mat%rank = n
     mat%nterms = nterms
     mat%nnz = 0
     mat%nlsym = .FALSE.
     mat%nlherm = .FALSE.
     mat%nlpos = .TRUE.
     mat%nrhs = 1
     mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
     IF(PRESENT(kmat)) mat%kmat = kmat
     IF(PRESENT(nlsym)) mat%nlsym = nlsym
     IF(PRESENT(nlherm)) mat%nlherm = nlherm
     IF(PRESENT(nlpos)) mat%nlpos = nlpos
     IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
 !
 !    Matrix partition
 !
     CALL dist1d(comm, 1, n, mat%istart, nloc)
     mat%iend = mat%istart + nloc - 1
     mat%rank_loc = nloc
 !
     IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat)
     ALLOCATE(mat%mat)
     CALL init(n, mat%mat, mat%istart, mat%iend)
 !
 !  Fill 'iparm' and 'dparm' with default values
 !
     mat%p%iparm(1:3) = 0
     IF(mat%nlherm .OR. mat%nlsym) THEN
        CALL pzssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
             &      mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, &
             &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
             &      mat%p%dparm)
        IF(mat%nlherm) THEN
           IF(mat%nlpos) THEN
              mat%p%iparm(31) = 0  ! hermitian, positive definite 
           ELSE
              mat%p%iparm(31) = 2  ! hermitian, no-definite, LDL^T with pivoting
           END IF
        ELSE
           mat%p%iparm(31) = 3     ! non-hermitian, symmetric
        END IF
     ELSE
        CALL pzgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank, &
             &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
     END IF
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i12)') 'INIT: Initialization failed with error', &
             &             mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
 !!$    WRITE(*,'(/a/(10i8))') 'iparm', mat%p%iparm
 !!$    WRITE(*,'(/a/(10(1pe8.1)))') 'dparm', mat%p%dparm
 !
     CALL setup_wsmp(mat%p%iparm, mat%p%dparm)
 !
   CONTAINS
     SUBROUTINE setup_wsmp(iparm, dparm)
       INTEGER          :: iparm(:)
       DOUBLE PRECISION :: dparm(:)
     END SUBROUTINE setup_wsmp
   END SUBROUTINE init_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE check_wsmp_mat(mat)
 !
 !  Check matrice id and  recall the matrice if not current
 !
     TYPE(wsmp_mat) :: mat
     INTEGER :: info
 !
     IF(.NOT.mat%nlsym) THEN
        IF( mat%matid.NE.current_matid ) THEN      
           WRITE(*,'(a)') "Processing multi matrices is not possible "// &
                &         "for non-symetric matrices."
           STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
        ELSE
           RETURN
        END IF
     END IF
 !
     IF( mat%matid.NE.current_matid ) THEN
        IF(current_matid .GE. 0) THEN
           CALL wstoremat(current_matid, info)
           IF(info.NE.0) THEN
              WRITE(*,'(a,i3,a,i12)') 'Store matrix', current_matid, &
                   &               ' failed with error', info
              STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
           END IF
        END IF
        CALL wrecallmat(mat%matid, info)
        IF(info.NE.0) THEN
           WRITE(*,'(a,i3,a,i12)') 'Recall matrix', mat%matid, &
                &              ' failed with error', info
           STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
        END IF
        current_matid = mat%matid
     END IF
   END SUBROUTINE check_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE check_zwsmp_mat(mat)
 !
 !  Check matrice id and  recall the matrice if not current
 !
     TYPE(zwsmp_mat) :: mat
     INTEGER :: info
 !
     IF(.NOT.mat%nlsym .AND. .NOT.mat%nlherm ) THEN
        IF( mat%matid.NE.current_matid ) THEN      
           WRITE(*,'(a)') "Processing multi matrices is not possible "// &
                &         "for non-symetric/non-hermitian matrices."
           STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
        ELSE
           RETURN
        END IF
     END IF
 !
     IF( mat%matid.NE.current_matid ) THEN
        IF(current_matid .GE. 0) THEN
           CALL wstoremat(current_matid, info)
           IF(info.NE.0) THEN
              WRITE(*,'(a,i3,a,i12)') 'Store matrix', current_matid, &
                   &               ' failed with error', info
              STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
           END IF
        END IF
        CALL wrecallmat(mat%matid, info)
        IF(info.NE.0) THEN
           WRITE(*,'(a,i3,a,i12)') 'Recall matrix', mat%matid, &
                &              ' failed with error', info
           STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
        END IF
        current_matid = mat%matid
     END IF
   END SUBROUTINE check_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE clear_wsmp_mat(mat)
 !
 !   Clear matrix, keeping its sparse structure unchanged
 !
     TYPE(wsmp_mat) :: mat
 !
     mat%val = 0.0d0
   END SUBROUTINE clear_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE clear_zwsmp_mat(mat)
 !
 !   Clear matrix, keeping its sparse structure unchanged
 !
     TYPE(zwsmp_mat) :: mat
 !
     mat%val = (0.0d0, 0.0d0)
   END SUBROUTINE clear_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE updt_wsmp_mat(mat, i, j, val)
 !
 !   Update element Aij of wsmp  matrix
 !
     TYPE(wsmp_mat)            :: mat
     INTEGER, INTENT(in)          :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
     INTEGER :: k, s, e
 !
     IF(mat%nlsym) THEN   ! Store only upper part for symmetric matrices
        IF(i.GT.j) RETURN
     END IF
     IF(i.LT.mat%istart .OR. i.GT.mat%iend) THEN
        WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
        WRITE(*,'(a,2i6)') '      istart, iend      ', mat%istart, mat%iend
        STOP '*** Abnormal EXIT in MODULE mumps_mod ***'
     END IF
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL updtmat(mat%mat, i, j, val)
     ELSE
        s = mat%irow(i)
        e = mat%irow(i+1)-1
        k = isearch(mat%cols(s:e), j)
        IF( k.GE.0 ) THEN
           mat%val(s+k) =  mat%val(s+k)+val
        ELSE
           WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
           STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
        END IF
     END IF
   END SUBROUTINE updt_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE updt_zwsmp_mat(mat, i, j, val)
 !
 !   Update element Aij of wsmp  matrix
 !
     TYPE(zwsmp_mat)            :: mat
     INTEGER, INTENT(in)        :: i, j
     DOUBLE COMPLEX, INTENT(in) :: val
     INTEGER :: k, s, e
 !
     IF(mat%nlherm .OR. mat%nlsym) THEN   ! Store only upper part
        IF(i.GT.j) RETURN
     END IF
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL updtmat(mat%mat, i, j, val)
     ELSE
        s = mat%irow(i)
        e = mat%irow(i+1)-1
        k = isearch(mat%cols(s:e), j)
        IF( k.GE.0 ) THEN
           IF(mat%nlherm) THEN
              mat%val(s+k) =  mat%val(s+k)+CONJG(val)  ! CSR-UT* = CSC-LT
           ELSE
              mat%val(s+k) =  mat%val(s+k)+val
        END IF
        ELSE
           WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
           STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
        END IF
     END IF
   END SUBROUTINE updt_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putele_wsmp_mat(mat, i, j, val)
 !
 !  Put element (i,j) of matrix
 !
     TYPE(wsmp_mat)            :: mat
     INTEGER, INTENT(in)          :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
     INTEGER :: iput, jput
     INTEGER :: k, s, e
 !
     iput = i
     jput = j
     IF(mat%nlsym) THEN
        IF( i.GT.j ) THEN    ! Lower triangular part
           iput = j
           jput = i
        END IF
     END IF
 !
 !    Do nothing if outside
     IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL putele(mat%mat, iput, jput, val, &
             &      nlforce_zero=mat%nlforce_zero)
     ELSE
        s = mat%irow(iput)
        e = mat%irow(iput+1) - 1
        k = isearch(mat%cols(s:e), jput)
        IF( k.GE.0 ) THEN
           mat%val(s+k) =  val
        ELSE
           IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
              WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
              STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
           END IF
        END IF
    END IF
   END SUBROUTINE putele_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putele_zwsmp_mat(mat, i, j, val)
 !
 !  Put element (i,j) of matrix
 !
     TYPE(zwsmp_mat)            :: mat
     INTEGER, INTENT(in)        :: i, j
     DOUBLE COMPLEX, INTENT(in) :: val
     DOUBLE COMPLEX :: valput
     INTEGER :: iput, jput
     INTEGER :: k, s, e
 !
     iput = i
     jput = j
     valput = val
     IF(mat%nlsym .OR. mat%nlherm) THEN
        IF( i.GT.j ) THEN    ! Lower triangular part
           iput = j
           jput = i
           IF(mat%nlherm) THEN
              valput = CONJG(val)
           ELSE
              valput = val
           END IF
        END IF
     END IF
 !
 !    Do nothing if outside
     IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL putele(mat%mat, iput, jput, valput, &
             &      nlforce_zero=mat%nlforce_zero)
     ELSE
        s = mat%irow(iput)
        e = mat%irow(iput+1) - 1
        k = isearch(mat%cols(s:e), jput)
        IF( k.GE.0 ) THEN
           IF(mat%nlherm) THEN
              mat%val(s+k) = CONJG(valput)   ! CSR-UT* = CSC-LT
           ELSE
              mat%val(s+k) =  valput
           END IF
        ELSE
           IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
              WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
              STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
           END IF
        END IF
    END IF
  END SUBROUTINE putele_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getele_wsmp_mat(mat, i, j, val)
 !
 !   Get element (i,j) of sparse matrix
 !
     TYPE(wsmp_mat)             :: mat
     INTEGER, INTENT(in)           :: i, j
     DOUBLE PRECISION, INTENT(out) :: val
     INTEGER :: iget, jget
     INTEGER :: k, s, e
 !
     iget = i
     jget = j
     IF(mat%nlsym) THEN
        IF( i.GT.j ) THEN    ! Lower triangular part
           iget = j
           jget = i
        END IF
     END IF
 !
     val = 0.0d0   ! Assume zero val if outside
     IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL getele(mat%mat, iget, jget, val)
     ELSE
        s = mat%irow(iget)
        e = mat%irow(iget+1) - 1
        k = isearch(mat%cols(s:e), jget)
        IF( k.GE.0 ) THEN
           val =mat%val(s+k) 
        ELSE
           val = 0.0d0   ! Assume zero val if not found
        END IF
     END IF
   END SUBROUTINE getele_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getele_zwsmp_mat(mat, i, j, val)
 !
 !   Get element (i,j) of sparse matrix
 !
     TYPE(zwsmp_mat)             :: mat
     INTEGER, INTENT(in)         :: i, j
     DOUBLE COMPLEX, INTENT(out) :: val
     DOUBLE COMPLEX :: valget
     INTEGER :: iget, jget
     INTEGER :: k, s, e
 !
     iget = i
     jget = j
     IF(mat%nlherm .OR. mat%nlsym) THEN
        IF( i.GT.j ) THEN    ! Lower triangular part
           iget = j
           jget = i
        END IF
     END IF
 !
     val = (0.0d0, 0.0d0)   ! Assume zero val if outside
     IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL getele(mat%mat, iget, jget, valget)
     ELSE
        s = mat%irow(iget)
        e = mat%irow(iget+1) - 1
        k = isearch(mat%cols(s:e), jget)
        IF( k.GE.0 ) THEN
           IF(mat%nlherm) THEN
              valget = CONJG(mat%val(s+k))   ! CSR-UT* = CSC-LT
           ELSE
              valget = mat%val(s+k) 
           END IF
        ELSE
           valget = (0.0d0,0.0d0)   ! Assume zero val if not found
        END IF
     END IF
     val = valget
     IF( i.GT.j ) THEN
        IF(mat%nlherm) THEN 
           val = CONJG(valget)
        END IF
     END IF
   END SUBROUTINE getele_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putrow_wsmp_mat(amat, i, arr)
 !
 ! Put a row into sparse matrix
 !
     TYPE(wsmp_mat), INTENT(inout) :: amat
     INTEGER, INTENT(in)              :: i
     DOUBLE PRECISION, INTENT(in)     :: arr(:)
     INTEGER :: j
 !
     DO j=1,amat%rank
        CALL putele(amat, i, j, arr(j))
     END DO
   END SUBROUTINE putrow_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putrow_zwsmp_mat(amat, i, arr)
 !
 ! Put a row into sparse matrix
 !
     TYPE(zwsmp_mat), INTENT(inout) :: amat
     INTEGER, INTENT(in)            :: i
     DOUBLE COMPLEX, INTENT(in)     :: arr(:)
     INTEGER :: j
 !
     DO j=1,amat%rank
        CALL putele(amat, i, j, arr(j))
     END DO
   END SUBROUTINE putrow_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getrow_wsmp_mat(amat, i, arr)
 !
 !   Get a row from sparse matrix
 !
     TYPE(wsmp_mat), INTENT(in)  :: amat
     INTEGER, INTENT(in)            :: i
     DOUBLE PRECISION, INTENT(out)  :: arr(:)
     INTEGER :: j
 !
     DO j=1,amat%rank
        CALL getele(amat, i, j, arr(j))
     END DO
   END SUBROUTINE getrow_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getrow_zwsmp_mat(amat, i, arr)
 !
 !   Get a row from sparse matrix
 !
     TYPE(zwsmp_mat), INTENT(in)  :: amat
     INTEGER, INTENT(in)          :: i
     DOUBLE COMPLEX, INTENT(out)  :: arr(:)
     INTEGER :: j
 !
     DO j=1,amat%rank
        CALL getele(amat, i, j, arr(j))
     END DO
   END SUBROUTINE getrow_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putcol_wsmp_mat(amat, j, arr)
 !
 ! Put a column into sparse matrix
 !
     TYPE(wsmp_mat), INTENT(inout) :: amat
     INTEGER, INTENT(in)              :: j
     DOUBLE PRECISION, INTENT(in)     :: arr(:)
     INTEGER :: i
 !
     DO i=amat%istart,amat%iend
        CALL putele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE putcol_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putcol_zwsmp_mat(amat, j, arr)
 !
 ! Put a column into sparse matrix
 !
     TYPE(zwsmp_mat), INTENT(inout) :: amat
     INTEGER, INTENT(in)            :: j
     DOUBLE COMPLEX, INTENT(in)     :: arr(:)
     INTEGER :: i
 !
     DO i=amat%istart,amat%iend
        CALL putele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE putcol_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getcol_wsmp_mat(amat, j, arr)
 !
 !   Get a column from sparse matrix
 !
     TYPE(wsmp_mat), INTENT(in)  :: amat
     INTEGER, INTENT(in)            :: j
     DOUBLE PRECISION, INTENT(out)  :: arr(:)
     INTEGER :: i
 !
     DO i=amat%istart,amat%iend
        CALL getele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE getcol_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getcol_zwsmp_mat(amat, j, arr)
 !
 !   Get a column from sparse matrix
 !
     TYPE(zwsmp_mat), INTENT(in)  :: amat
     INTEGER, INTENT(in)          :: j
     DOUBLE COMPLEX, INTENT(out)  :: arr(:)
     INTEGER :: i
 !
     DO i=amat%istart,amat%iend
        CALL getele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE getcol_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   INTEGER FUNCTION get_count_wsmp_mat(mat, nnz)
 !
 !   Number of non-zeros in sparse matrix
 !
     TYPE(wsmp_mat)                 :: mat
     INTEGER, INTENT(out), OPTIONAL :: nnz(:)
     INTEGER :: i
 !
     IF(ASSOCIATED(mat%mat)) THEN
        get_count_wsmp_mat = get_count(mat%mat, nnz)
     ELSE
        get_count_wsmp_mat = mat%nnz
        IF(PRESENT(nnz)) THEN
           DO i=mat%istart,mat%iend
              nnz(i) = mat%irow(i+1)-mat%irow(i)
           END DO
        END IF
     END IF
   END FUNCTION get_count_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   INTEGER FUNCTION get_count_zwsmp_mat(mat, nnz)
 !
 !   Number of non-zeros in sparse matrix
 !
     TYPE(zwsmp_mat)                :: mat
     INTEGER, INTENT(out), OPTIONAL :: nnz(:)
     INTEGER :: i
 !
     IF(ASSOCIATED(mat%mat)) THEN
        get_count_zwsmp_mat = get_count(mat%mat, nnz)
     ELSE
        get_count_zwsmp_mat = mat%nnz
        IF(PRESENT(nnz)) THEN
           DO i=mat%istart,mat%iend
              nnz(i) = mat%irow(i+1)-mat%irow(i)
           END DO
        END IF
     END IF
   END FUNCTION get_count_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE to_wsmp_mat(mat, nlkeep)
 !
 !   Convert linked list spmat to wsmp matrice structure
 !
     INCLUDE 'mpif.h'
     TYPE(wsmp_mat) :: mat
     LOGICAL, INTENT(in), OPTIONAL :: nlkeep
     INTEGER :: i, nnz, rank, s, e
     INTEGER :: comm, ierr, nnz_loc, rank_loc
     LOGICAL :: nlclean
 !
     nlclean = .TRUE.
     IF(PRESENT(nlkeep)) THEN
        nlclean = .NOT. nlkeep
     END IF
 !
     comm = mat%comm
 !
 !    Allocate the WSMP matrix structure
 !
     rank = mat%rank
     rank_loc = mat%rank
 !
     nnz_loc = get_count(mat)
     mat%nnz_start = 0
     CALL mpi_exscan(nnz_loc, mat%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
     mat%nnz_start = mat%nnz_start + 1
     mat%nnz_end = mat%nnz_start + nnz_loc - 1
     mat%nnz_loc = nnz_loc
     CALL mpi_allreduce(nnz_loc, mat%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
 !
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
     IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
 !
 !    Allocate LOCAL irow, cols and val
     IF(mat%nlsym) THEN
        ALLOCATE(mat%perm(rank))
        ALLOCATE(mat%invp(rank))
     END IF
     ALLOCATE(mat%val(nnz_loc))
     ALLOCATE(mat%cols(nnz_loc))
     ALLOCATE(mat%irow(mat%istart:mat%iend+1))
 !
 !    Fill WSMP structure and deallocate the sparse rows
 !
     mat%irow(mat%istart) = 1
     DO i=mat%istart,mat%iend
        mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz
        s = mat%irow(i)
        e = mat%irow(i+1)+1
        CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e))
        IF(nlclean) CALL destroy(mat%mat%row(i))
     END DO
     IF(nlclean) DEALLOCATE(mat%mat)
   END SUBROUTINE to_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE to_zwsmp_mat(mat, nlkeep)
 !
 !   Convert linked list spmat to wsmp matrice structure
 !
     INCLUDE 'mpif.h'
     TYPE(zwsmp_mat) :: mat
     LOGICAL, INTENT(in), OPTIONAL :: nlkeep
     INTEGER :: i, nnz, rank, s, e
     INTEGER :: comm, ierr, nnz_loc, rank_loc
     LOGICAL :: nlclean
 !
     nlclean = .TRUE.
     IF(PRESENT(nlkeep)) THEN
        nlclean = .NOT. nlkeep
     END IF
 !
     comm = mat%comm
 !
 !    Allocate the WSMP matrix structure
 !
     rank = mat%rank
     rank_loc = mat%rank
 !
     nnz_loc = get_count(mat)
     mat%nnz_start = 0
     CALL mpi_exscan(nnz_loc, mat%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
     mat%nnz_start = mat%nnz_start + 1
     mat%nnz_end = mat%nnz_start + nnz_loc - 1
     mat%nnz_loc = nnz_loc
     CALL mpi_allreduce(nnz_loc, mat%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
 !
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
     IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
 !
 !    Allocate LOCAL irow, cols and val
     IF(mat%nlsym) THEN
        ALLOCATE(mat%perm(rank))
        ALLOCATE(mat%invp(rank))
     END IF
     ALLOCATE(mat%val(nnz_loc))
     ALLOCATE(mat%cols(nnz_loc))
     ALLOCATE(mat%irow(mat%istart:mat%iend+1))
 !
 !    Fill WSMP structure and deallocate the sparse rows
 !
     mat%irow(mat%istart) = 1
     DO i=mat%istart,mat%iend
        mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz
        s = mat%irow(i)
        e = mat%irow(i+1)+1
        CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e))
        IF(nlclean) CALL destroy(mat%mat%row(i))
     END DO
     IF(mat%nlherm) THEN
        mat%val(:) = CONJG(mat%val(:))  ! CSR-UT* = CSC-LT
     END IF
     IF(nlclean) DEALLOCATE(mat%mat)
   END SUBROUTINE to_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE reord_wsmp_mat(mat)
 !
 !   Reordering and symbolic factorization
 !
     TYPE(wsmp_mat) :: mat
     DOUBLE PRECISION :: dummy
 !
 !    Recall the matrice if not current
 !
     CALL check_mat(mat)
 !
     IF(mat%nlsym) THEN
        mat%p%iparm(2) = 1  ! Ordering
        mat%p%iparm(3) = 2  ! Symbolic factorization
        CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
             &      mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, &
             &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
             &      mat%p%dparm)
     ELSE
        mat%p%iparm(2) = 1  ! Analysis and reordering
        mat%p%iparm(3) = 1
        CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank_loc, &
             &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
     END IF
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i12)') 'REORD: Reordering failed with error', mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
   END SUBROUTINE reord_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE reord_zwsmp_mat(mat)
 !
 !   Reordering and symbolic factorization
 !
     TYPE(zwsmp_mat)             :: mat
     DOUBLE COMPLEX :: dummy
 !
 !    Recall the matrice if not current
 !
     CALL check_mat(mat)
 !
     IF(mat%nlsym .OR. mat%nlherm) THEN
        mat%p%iparm(2) = 1  ! Ordering
        mat%p%iparm(3) = 2  ! Symbolic factorization
        CALL zssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
             &      mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, &
             &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
             &      mat%p%dparm)
 !!$       WRITE(*,'(a,i3/(10i8))') 'REORD: matrice', mat%matid, mat%perm
     ELSE
        mat%p%iparm(2) = 1  ! Analysis and reordering
        mat%p%iparm(3) = 1
        CALL zgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank_loc, &
             &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
     END IF
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i12)') 'REORD: Reordering failed with error', mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
   END SUBROUTINE reord_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE numfact_wsmp_mat(mat)
 !
 !   Numerical factorization
 !
     TYPE(wsmp_mat) :: mat
     DOUBLE PRECISION :: dummy
 !
 !    Recall the matrice if not current
 !
     CALL check_mat(mat)
 !
     IF(mat%nlsym) THEN
        mat%p%iparm(2) = 3   ! Numerical factorization
        mat%p%iparm(3) = 3
        CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
             &      mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, &
             &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
             &      mat%p%dparm)
     ELSE
        mat%p%iparm(2) = 2  ! Factorization
        mat%p%iparm(3) = 2
        CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank_loc, &
             &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
     END IF
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i12)') 'NUMFACT: Num. Factor failed with error', mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
   END SUBROUTINE numfact_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE numfact_zwsmp_mat(mat)
 !
 !   Numerical factorization
 !
     TYPE(zwsmp_mat) :: mat
     DOUBLE COMPLEX :: dummy
 !
 !    Recall the matrice if not current
 !
     CALL check_mat(mat)
 !
     IF(mat%nlsym .OR. mat%nlherm) THEN
        mat%p%iparm(2) = 3   ! Numerical factorization
        mat%p%iparm(3) = 3
        CALL zssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
             &      mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, &
             &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
             &      mat%p%dparm)
     ELSE
        mat%p%iparm(2) = 2  ! Factorization
        mat%p%iparm(3) = 2
        CALL zgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank_loc, &
             &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
     END IF
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i12)') 'NUMFACT: Num. Factor failed with error', mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
   END SUBROUTINE numfact_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE factor_wsmp_mat(mat, nlreord)
 !
 !  Factor (create  +reorder + factor) a wsmp_mat matrix
 !
     TYPE(wsmp_mat)                :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: nlreord
     LOGICAL :: mlreord
 !----------------------------------------------------------------------
 !               1.0  Creation from the sparse rows
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL to_mat(mat)
     END IF
 !----------------------------------------------------------------------
 !               2.0  Reordering and symbolic factorization phase
 !
     mlreord = .TRUE.
     IF(PRESENT(nlreord)) mlreord = nlreord
     IF(mlreord) THEN
        CALL reord_mat(mat)
     END IF
 !----------------------------------------------------------------------
 !               3.0  Numerical factorization
 !
     CALL numfact(mat)
   END SUBROUTINE factor_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE factor_zwsmp_mat(mat, nlreord)
 !
 !  Factor (create  +reorder + factor) a wsmp_mat matrix
 !
     TYPE(zwsmp_mat)               :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: nlreord
     LOGICAL :: mlreord
 !----------------------------------------------------------------------
 !               1.0  Creation from the sparse rows
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL to_mat(mat)
     END IF
 !----------------------------------------------------------------------
 !               2.0  Reordering and symbolic factorization phase
 !
     mlreord = .TRUE.
     IF(PRESENT(nlreord)) mlreord = nlreord
     IF(mlreord) THEN
        CALL reord_mat(mat)
     END IF
 !----------------------------------------------------------------------
 !               3.0  Numerical factorization
 !
     CALL numfact(mat)
   END SUBROUTINE factor_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE bsolve_wsmp_mat1(mat, rhs, sol, nref)
 !
 !   Backsolve, using Wsmp
 !
     INCLUDE 'mpif.h'
     TYPE(wsmp_mat)             :: mat
     DOUBLE PRECISION           :: rhs(:)
     DOUBLE PRECISION, OPTIONAL :: sol(:)
     INTEGER, OPTIONAL          :: nref
 !
     DOUBLE PRECISION :: sol_loc(mat%rank_loc)
     INTEGER :: nloc, me, nprocs, ierr, i
     INTEGER, ALLOCATABLE :: nlocs(:), displs(:)
     DOUBLE PRECISION :: dummy
 !
 !    Recall the matrice if not current
 !
     CALL check_mat(mat)
 !
     IF(mat%nlsym) THEN
        mat%p%iparm(2) = 4  ! Back substitution
        mat%p%iparm(3) = 4
     ELSE
        mat%p%iparm(2) = 3  ! Back substitution
        mat%p%iparm(3) = 3
     END IF
     mat%p%iparm(6) = 0  ! Max numbers of iterative refinement steps
     IF(PRESENT(nref)) THEN
        IF(mat%nlsym) THEN
           mat%p%iparm(3) = 5
        ELSE
           mat%p%iparm(3) = 4
        END IF
        mat%p%iparm(6) = nref
     END IF
     mat%nrhs = 1
 !
 !  Extract local rhs from global rhs
 !
     sol_loc = rhs(mat%istart:mat%iend)
 !
     IF(mat%nlsym) THEN
        CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
             &      mat%perm, mat%invp, sol_loc, mat%rank_loc, mat%nrhs, &
             &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
             &      mat%p%dparm)
     ELSE
        CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, sol_loc, &
             &     mat%rank_loc, mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
     END IF
 !
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i12)') 'BSOLVE: Failed with error', mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
 !
 !   Allgatherv local sol
 !
     CALL mpi_comm_rank(mat%comm, me, ierr)
     CALL mpi_comm_size(mat%comm, nprocs, ierr)
 !
     ALLOCATE(displs(0:nprocs))
     ALLOCATE(nlocs(0:nprocs-1))
 !
     nloc = mat%rank_loc
     CALL mpi_allgather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, &
          &   mat%comm, ierr)
 !
     displs(0) = 0
     DO i=0,nprocs-1
        displs(i+1) = displs(i)+nlocs(i)
     END DO
 !
     IF(PRESENT(sol)) THEN
        CALL mpi_allgatherv(sol_loc, nloc, MPI_DOUBLE_PRECISION, &
             &   sol, nlocs, displs, MPI_DOUBLE_PRECISION, &
             &   mat%comm, ierr)
     ELSE
        CALL mpi_allgatherv(sol_loc, nloc, MPI_DOUBLE_PRECISION, &
             &   rhs, nlocs, displs, MPI_DOUBLE_PRECISION, &
             &   mat%comm, ierr)
     END IF
 !
     DEALLOCATE(nlocs)
     DEALLOCATE(displs)
 !
    END SUBROUTINE bsolve_wsmp_mat1
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE bsolve_zwsmp_mat1(mat, rhs, sol, nref)
 !
 !   Backsolve, using Wsmp
 !
     INCLUDE 'mpif.h'
     TYPE(zwsmp_mat)          :: mat
     DOUBLE COMPLEX           :: rhs(:)
     DOUBLE COMPLEX, OPTIONAL :: sol(:)
     INTEGER, OPTIONAL        :: nref
 !
     DOUBLE COMPLEX :: sol_loc(mat%rank_loc)
     INTEGER :: nloc, me, nprocs, ierr, i
     INTEGER, ALLOCATABLE :: nlocs(:), displs(:)
     DOUBLE COMPLEX :: dummy
 !
 !    Recall the matrice if not current
 !
     CALL check_mat(mat)
 !
     IF(mat%nlsym .OR. mat%nlherm) THEN
        mat%p%iparm(2) = 4  ! Back substitution
        mat%p%iparm(3) = 4
     ELSE
        mat%p%iparm(2) = 3  ! Back substitution
        mat%p%iparm(3) = 3
     END IF
     mat%p%iparm(6) = 0  ! Max numbers of iterative refinement steps
     IF(PRESENT(nref)) THEN
        IF(mat%nlsym .OR. mat%nlherm) THEN
           mat%p%iparm(3) = 5
        ELSE
           mat%p%iparm(3) = 4
        END IF
        mat%p%iparm(6) = nref
     END IF
     mat%nrhs = 1
 !
 !  Extract local rhs from global rhs
 !
     sol_loc = rhs(mat%istart:mat%iend)
 !
     IF(mat%nlsym .OR. mat%nlherm) THEN
        CALL pzssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
             &      mat%perm, mat%invp, sol_loc, mat%rank_loc, mat%nrhs, &
             &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
             &      mat%p%dparm)
     ELSE
        CALL pzgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, sol_loc, &
             &      mat%rank_loc,  mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
     END IF
 !
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i12)') 'BSOLVE: Failed with error', mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
 !
 !   Allgatherv local sol
 !
     CALL mpi_comm_rank(mat%comm, me, ierr)
     CALL mpi_comm_size(mat%comm, nprocs, ierr)
 !
     ALLOCATE(displs(0:nprocs))
     ALLOCATE(nlocs(0:nprocs-1))
 !
     nloc = mat%rank_loc
     CALL mpi_allgather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, &
          &   mat%comm, ierr)
 !
     displs(0) = 0
     DO i=0,nprocs-1
        displs(i+1) = displs(i)+nlocs(i)
     END DO
 !
     IF(PRESENT(sol)) THEN
        CALL mpi_allgatherv(sol_loc, nloc, MPI_DOUBLE_COMPLEX, &
             &   sol, nlocs, displs, MPI_DOUBLE_COMPLEX, &
             &   mat%comm, ierr)
     ELSE
        CALL mpi_allgatherv(sol_loc, nloc, MPI_DOUBLE_COMPLEX, &
             &   rhs, nlocs, displs, MPI_DOUBLE_COMPLEX, &
             &   mat%comm, ierr)
     END IF
 !
     DEALLOCATE(nlocs)
     DEALLOCATE(displs)
 !
   END SUBROUTINE bsolve_zwsmp_mat1
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE bsolve_wsmp_matn(mat, rhs, sol, nref)
 !
 !   Backsolve, using Wsmp, multiple RHS
 !
     INCLUDE 'mpif.h'
     TYPE(wsmp_mat)             :: mat
     DOUBLE PRECISION           :: rhs(:,:)
     DOUBLE PRECISION, OPTIONAL :: sol(:,:)
     INTEGER, OPTIONAL          :: nref
 !
     DOUBLE PRECISION :: sol_loc(mat%rank_loc,SIZE(rhs,2))
     INTEGER :: nloc, me, nprocs, ierr, i
     INTEGER, ALLOCATABLE :: nlocs(:), displs(:)
     DOUBLE PRECISION :: dummy
 !
 !    Recall the matrice if not current
 !
     CALL check_mat(mat)
 !
     IF(mat%nlsym) THEN
        mat%p%iparm(2) = 4  ! Back substitution
        mat%p%iparm(3) = 4
     ELSE
        mat%p%iparm(2) = 3  ! Back substitution
        mat%p%iparm(3) = 3
     END IF
     mat%p%iparm(6) = 0  ! Max numbers of iterative refinement steps
     IF(PRESENT(nref)) THEN
        IF(mat%nlsym) THEN
           mat%p%iparm(3) = 5
        ELSE
           mat%p%iparm(3) = 4
        END IF
        mat%p%iparm(6) = nref
     END IF
     mat%nrhs = SIZE(rhs,2)
 !
 !  Extract local rhs from global rhs
 !
     sol_loc(:,:) = rhs(mat%istart:mat%iend,:)
 !
     IF(mat%nlsym) THEN
        CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
             &      mat%perm, mat%invp, sol_loc, mat%rank_loc, mat%nrhs, &
             &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
             &      mat%p%dparm)
     ELSE
        CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, sol_loc, &
             &     mat%rank_loc, mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
     END IF
 !
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i12)') 'BSOLVE: Failed with error', mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
 !
 !   Allgatherv local sol
 !
     CALL mpi_comm_rank(mat%comm, me, ierr)
     CALL mpi_comm_size(mat%comm, nprocs, ierr)
 !
     ALLOCATE(displs(0:nprocs))
     ALLOCATE(nlocs(0:nprocs-1))
 !
     nloc = mat%rank_loc
     CALL mpi_allgather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, &
          &   mat%comm, ierr)
 !
     displs(0) = 0
     DO i=0,nprocs-1
        displs(i+1) = displs(i)+nlocs(i)
     END DO
 !
     DO i=1,mat%nrhs
        IF(PRESENT(sol)) THEN
           CALL mpi_allgatherv(sol_loc(1,i), nloc, MPI_DOUBLE_PRECISION, &
                &   sol(1,i), nlocs, displs, MPI_DOUBLE_PRECISION, &
                &   mat%comm, ierr)
        ELSE
           CALL mpi_allgatherv(sol_loc(1,i), nloc, MPI_DOUBLE_PRECISION, &
                &   rhs(1,i), nlocs, displs, MPI_DOUBLE_PRECISION, &
                &   mat%comm, ierr)
        END IF
     END DO
 !
     DEALLOCATE(nlocs)
     DEALLOCATE(displs)
 !
   END SUBROUTINE bsolve_wsmp_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE bsolve_zwsmp_matn(mat, rhs, sol, nref)
 !
 !   Backsolve, using Wsmp, multiple RHS
 !
     INCLUDE 'mpif.h'
     TYPE(zwsmp_mat)          :: mat
     DOUBLE COMPLEX           :: rhs(:,:)
     DOUBLE COMPLEX, OPTIONAL :: sol(:,:)
     INTEGER, OPTIONAL        :: nref
 !
     DOUBLE COMPLEX :: sol_loc(mat%rank_loc,SIZE(rhs,2))
     INTEGER :: nloc, me, nprocs, ierr, i
     INTEGER, ALLOCATABLE :: nlocs(:), displs(:)
     DOUBLE COMPLEX :: dummy
 !
 !    Recall the matrice if not current
 !
     CALL check_mat(mat)
 !
     IF(mat%nlsym .or. mat%nlherm) THEN
        mat%p%iparm(2) = 4  ! Back substitution
        mat%p%iparm(3) = 4
     ELSE
        mat%p%iparm(2) = 3  ! Back substitution
        mat%p%iparm(3) = 3
     END IF
     mat%p%iparm(6) = 0  ! Max numbers of iterative refinement steps
     IF(PRESENT(nref)) THEN
        IF(mat%nlsym .OR. mat%nlherm) THEN
           mat%p%iparm(3) = 5
        ELSE
           mat%p%iparm(3) = 4
        END IF
        mat%p%iparm(6) = nref
     END IF
     mat%nrhs = SIZE(rhs,2)
 !
 !  Extract local rhs from global rhs
 !
     sol_loc(:,:) = rhs(mat%istart:mat%iend,:)
 !
     IF(mat%nlsym) THEN
        CALL pzssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
             &      mat%perm, mat%invp, sol_loc, mat%rank_loc, mat%nrhs, &
             &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
             &      mat%p%dparm)
     ELSE
        CALL pzgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, sol_loc, &
             &     mat%rank_loc, mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
     END IF
 !
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i12)') 'BSOLVE: Failed with error', mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
 !
 !   Allgatherv local sol
 !
     CALL mpi_comm_rank(mat%comm, me, ierr)
     CALL mpi_comm_size(mat%comm, nprocs, ierr)
 !
     ALLOCATE(displs(0:nprocs))
     ALLOCATE(nlocs(0:nprocs-1))
 !
     nloc = mat%rank_loc
     CALL mpi_allgather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, &
          &   mat%comm, ierr)
 !
     displs(0) = 0
     DO i=0,nprocs-1
        displs(i+1) = displs(i)+nlocs(i)
     END DO
 !
     DO i=1,mat%nrhs
        IF(PRESENT(sol)) THEN
           CALL mpi_allgatherv(sol_loc(1,i), nloc, MPI_DOUBLE_COMPLEX, &
                &   sol(1,i), nlocs, displs, MPI_DOUBLE_COMPLEX, &
                &   mat%comm, ierr)
        ELSE
           CALL mpi_allgatherv(sol_loc(1,i), nloc, MPI_DOUBLE_COMPLEX, &
                &   rhs(1,i), nlocs, displs, MPI_DOUBLE_COMPLEX, &
                &   mat%comm, ierr)
        END IF
     END DO
 !
     DEALLOCATE(nlocs)
     DEALLOCATE(displs)
 !
   END SUBROUTINE bsolve_zwsmp_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_wsmp_mat(mat, xarr) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(wsmp_mat)                :: mat
     DOUBLE PRECISION, INTENT(in)  :: xarr(:)
     DOUBLE PRECISION              :: yarr(SIZE(xarr))
     DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
     CHARACTER(len=6) :: matdescra
     INTEGER :: n, i, j
 !
     n = mat%rank
 !
 #ifdef MKL
     IF(mat%nlsym) THEN
        matdescra = 'sun'
     ELSE
        matdescra = 'g'
     END IF
 !
     CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, &
          &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
          &          beta, yarr)
 #else
     yarr = 0.0d0
     DO i=1,n
        DO j=mat%irow(i), mat%irow(i+1)-1
           yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
        END DO
        IF(mat%nlsym) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j)) = yarr(mat%cols(j)) &
                   &              + mat%val(j)*xarr(i)
           END DO
        END IF
     END DO
 #endif
 !
   END FUNCTION vmx_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_zwsmp_mat(mat, xarr) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(zwsmp_mat)             :: mat
     DOUBLE COMPLEX, INTENT(in)  :: xarr(:)
     DOUBLE COMPLEX              :: yarr(SIZE(xarr))
     DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
     INTEGER :: n, i, j
     CHARACTER(len=6) :: matdescra
     CHARACTER(len=1) :: transa
 !
     n = mat%rank
 !
 #ifdef MKL
     IF(mat%nlsym) THEN
        matdescra = 'sun'
     ELSE IF(mat%nlherm) THEN
        matdescra = 'hun'
     ELSE
        matdescra = 'g'
     END IF
     transa='N'
     IF(mat%nlherm) THEN
        transa='T'   ! Transpose(CSR-UT*) = CSC-LT* = CSR-UT
     END IF
     CALL mkl_zcsrmv(transa, n, n, alpha, matdescra, mat%val, &
          &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
          &          beta, yarr)
 #else
     yarr = (0.0d0,0.0d0)
     DO i=1,n
        IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)*
           DO j=mat%irow(i), mat%irow(i+1)-1
              yarr(i) = yarr(i) + CONJG(mat%val(j))*xarr(mat%cols(j))
           END DO
        ELSE
           DO j=mat%irow(i), mat%irow(i+1)-1
              yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
           END DO
        END IF
        IF(mat%nlsym) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j)) = yarr(mat%cols(j)) &
                   &              + mat%val(j)*xarr(i)
           END DO
        ELSE IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)*
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j)) = yarr(mat%cols(j)) &
                   &              + mat%val(j)*xarr(i)
           END DO
        END IF
     END DO
 #endif
 !
   END FUNCTION vmx_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_wsmp_matn(mat, xarr) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(wsmp_mat)                :: mat
     DOUBLE PRECISION, INTENT(in)  :: xarr(:,:)
     DOUBLE PRECISION              :: yarr(SIZE(xarr,1),SIZE(xarr,2))
 !
     DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
     INTEGER :: n, nrhs, i, j
     CHARACTER(len=6) :: matdescra
 !
     n = mat%rank
     nrhs = SIZE(xarr,2)
 !
 #ifdef MKL
     IF(mat%nlsym) THEN
        matdescra = 'sun'
     ELSE
        matdescra = 'g'
     END IF
 !
     CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,&
          &           mat%cols, mat%irow(1), mat%irow(2), xarr, &
          &           n, beta, yarr, n)
 #else
     yarr = 0.0d0
     DO i=1,n
        DO j=mat%irow(i), mat%irow(i+1)-1
           yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
        END DO
        IF(mat%nlsym) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
                   &                + mat%val(j)*xarr(i,:)
           END DO
        END IF
     END DO
 #endif
 !
   END FUNCTION vmx_wsmp_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_zwsmp_matn(mat, xarr) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(zwsmp_mat)             :: mat
     DOUBLE COMPLEX, INTENT(in)  :: xarr(:,:)
     DOUBLE COMPLEX              :: yarr(SIZE(xarr,1),SIZE(xarr,2))
 !
     DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
     INTEGER :: n, nrhs, i, j
     CHARACTER(len=6) :: matdescra
     CHARACTER(len=1) :: transa
 !
     n = mat%rank
     nrhs = SIZE(xarr,2)
 !
 #ifdef MKL
     IF(mat%nlsym) THEN
        matdescra = 'sun'
     ELSE IF(mat%nlherm) THEN
        matdescra = 'hun'
     ELSE
        matdescra = 'g'
     END IF
     transa='N'
     IF(mat%nlherm) THEN
        transa='T' ! Transpose(CSR-UT*) = CSC-LT* = CSR-UT
     END IF
 !
     CALL mkl_zcsrmm(transa, n, nrhs, n, alpha, matdescra, mat%val, &
          &          mat%cols, mat%irow(1), mat%irow(2), xarr, n, &
          &          beta, yarr, n)
 #else
     yarr = (0.0d0,0.0d0)
     DO i=1,n
        IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)*
           DO j=mat%irow(i), mat%irow(i+1)-1
              yarr(i,:) = yarr(i,:) + CONJG(mat%val(j))*xarr(mat%cols(j),:)
           END DO
        ELSE
           DO j=mat%irow(i), mat%irow(i+1)-1
              yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
           END DO
        END IF
        IF(mat%nlsym) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
                   &                + mat%val(j)*xarr(i,:)
           END DO
        ELSE IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)*
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
                   &               + mat%val(j)*xarr(i,:)
           END DO
        END IF
     END DO
 #endif
 !
   END FUNCTION vmx_zwsmp_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE destroy_wsmp_mat(mat)
 !
 !  Deallocate the sparse matrix mat
 !
     TYPE(wsmp_mat) :: mat
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL destroy(mat%mat)
        DEALLOCATE(mat%mat)
     END IF
 !
 !  Release memory for factors for symmetric matrix
     IF(mat%nlsym) THEN
        CALL check_mat(mat)
        CALL wsffree
     END IF
 !
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
     IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp)
     IF(ASSOCIATED(mat%mrp)) DEALLOCATE(mat%mrp)
     IF(ASSOCIATED(mat%diag)) DEALLOCATE(mat%diag)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     IF(ASSOCIATED(mat%aux)) DEALLOCATE(mat%aux)
   END SUBROUTINE destroy_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE destroy_zwsmp_mat(mat)
 !
 !  Deallocate the sparse matrix mat
 !
     TYPE(zwsmp_mat) :: mat
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL destroy(mat%mat)
        DEALLOCATE(mat%mat)
     END IF
 !
 !  Release memory for factors for symmetric/hermitian matrix
     IF(mat%nlsym .OR. mat%nlherm) THEN
        CALL check_mat(mat)
        CALL wsffree
     END IF
 !
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
     IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp)
     IF(ASSOCIATED(mat%mrp)) DEALLOCATE(mat%mrp)
     IF(ASSOCIATED(mat%diag)) DEALLOCATE(mat%diag)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     IF(ASSOCIATED(mat%aux)) DEALLOCATE(mat%aux)
   END SUBROUTINE destroy_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE put_wsmp_mat(fid, label, mat, str)
 !
 !   Write matrix to hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)                    :: fid
     CHARACTER(len=*), INTENT(in)           :: label
     TYPE(wsmp_mat)                      :: mat
     CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
 !
     IF(PRESENT(str)) THEN
        CALL creatg(fid, label, str)
     ELSE
        CALL creatg(fid, label)
     END IF
     CALL attach(fid, label, 'RANK', mat%rank)
     CALL attach(fid, label, 'NNZ',  mat%nnz)
     CALL attach(fid, label, 'NLSYM', mat%nlsym)
     CALL attach(fid, label, 'NLPOS', mat%nlpos)
     CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
     CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
     CALL putarr(fid, TRIM(label)//'/val', mat%val)
 !
     CALL creatg(fid, TRIM(label)//'/p')
     CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
     CALL putarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm)
   END SUBROUTINE put_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE put_zwsmp_mat(fid, label, mat, str)
 !
 !   Write matrix to hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)                    :: fid
     CHARACTER(len=*), INTENT(in)           :: label
     TYPE(zwsmp_mat)                        :: mat
     CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
 !
     IF(PRESENT(str)) THEN
        CALL creatg(fid, label, str)
     ELSE
        CALL creatg(fid, label)
     END IF
     CALL attach(fid, label, 'RANK', mat%rank)
     CALL attach(fid, label, 'NNZ',  mat%nnz)
     CALL attach(fid, label, 'NLSYM', mat%nlsym)
     CALL attach(fid, label, 'NLPOS', mat%nlpos)
     CALL attach(fid, label, 'NLHERM', mat%nlherm)
     CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
     CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
     CALL putarr(fid, TRIM(label)//'/val', mat%val)
 !
     CALL creatg(fid, TRIM(label)//'/p')
     CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
     CALL putarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm)
   END SUBROUTINE put_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE get_wsmp_mat(fid, label, mat)
 !
 !   Read matrix from hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)          :: fid
     CHARACTER(len=*), INTENT(in) :: label
     TYPE(wsmp_mat)               :: mat
 !
     CALL getatt(fid, label, 'RANK', mat%rank)
     CALL getatt(fid, label, 'NNZ',  mat%nnz)
     CALL getatt(fid, label, 'NLSYM', mat%nlsym)
     CALL getatt(fid, label, 'NLPOS', mat%nlpos)
     CALL getarr(fid, TRIM(label)//'/irow', mat%irow)
     CALL getarr(fid, TRIM(label)//'/cols', mat%cols)
     IF(mat%nlsym) THEN
        CALL getarr(fid, TRIM(label)//'/perm', mat%perm)
        CALL getarr(fid, TRIM(label)//'/invp', mat%invp)
     END IF
     CALL getarr(fid, TRIM(label)//'/val', mat%val)
 !
     CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
     CALL getarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm)
   END SUBROUTINE get_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE get_zwsmp_mat(fid, label, mat)
 !
 !   Read matrix from hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)          :: fid
     CHARACTER(len=*), INTENT(in) :: label
     TYPE(zwsmp_mat)              :: mat
 !
     CALL getatt(fid, label, 'RANK', mat%rank)
     CALL getatt(fid, label, 'NNZ',  mat%nnz)
     CALL getatt(fid, label, 'NLSYM', mat%nlsym)
     CALL getatt(fid, label, 'NLPOS', mat%nlpos)
     CALL getatt(fid, label, 'NLHERM', mat%nlherm)
     CALL getarr(fid, TRIM(label)//'/irow', mat%irow)
     CALL getarr(fid, TRIM(label)//'/cols', mat%cols)
     IF(mat%nlsym) THEN
        CALL getarr(fid, TRIM(label)//'/perm', mat%perm)
        CALL getarr(fid, TRIM(label)//'/invp', mat%invp)
     END IF
     CALL getarr(fid, TRIM(label)//'/val', mat%val)
 !
     CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
     CALL getarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm)
   END SUBROUTINE get_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE mcopy_wsmp_mat(mata, matb)
 !
 !   Matrix copy: B = A
 !
     TYPE(wsmp_mat) :: mata, matb
     INTEGER :: n, nnz
 !
 ! Assume that matb was already initialized by init_wsmp_mat.
     IF(matb%rank.LE.0) THEN
        WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT'
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
 !
     IF(ASSOCIATED(matb%mat)) THEN 
        CALL destroy(matb%mat)
        DEALLOCATE(matb%mat)
     END IF
 !
     n = mata%rank
     nnz = mata%nnz
     matb%rank = n
     matb%nnz = nnz
     matb%nlsym = mata%nlsym
     matb%nlpos = mata%nlpos
     matb%nlforce_zero = mata%nlforce_zero
 !
     IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
     IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols)
     IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow)
     IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm)
     IF(ASSOCIATED(matb%invp)) DEALLOCATE(matb%invp)
     ALLOCATE(matb%val(nnz)); matb%val = mata%val
     ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols
     ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow
     ALLOCATE(matb%perm(n))
     IF(matb%nlsym) THEN
        ALLOCATE(matb%perm(n))
        ALLOCATE(matb%invp(n))
     END IF
   END SUBROUTINE mcopy_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE mcopy_zwsmp_mat(mata, matb)
 !
 !   Matrix copy: B = A
 !
     TYPE(zwsmp_mat) :: mata, matb
     INTEGER :: n, nnz
 !
 ! Assume that matb was already initialized by init_wsmp_mat.
     IF(matb%rank.LE.0) THEN
        WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT'
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
 !
     IF(ASSOCIATED(matb%mat)) THEN 
        CALL destroy(matb%mat)
        DEALLOCATE(matb%mat)
     END IF
 !
     n = mata%rank
     nnz = mata%nnz
     matb%rank = n
     matb%nnz = nnz
     matb%nlsym = mata%nlsym
     matb%nlherm = mata%nlherm
     matb%nlpos = mata%nlpos
     matb%nlforce_zero = mata%nlforce_zero
 !
     IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
     IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols)
     IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow)
     IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm)
     IF(ASSOCIATED(matb%invp)) DEALLOCATE(matb%invp)
     ALLOCATE(matb%val(nnz)); matb%val = mata%val
     ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols
     ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow
     ALLOCATE(matb%perm(n))
     IF(matb%nlsym) THEN
        ALLOCATE(matb%perm(n))
        ALLOCATE(matb%invp(n))
     END IF
   END SUBROUTINE mcopy_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE maddto_wsmp_mat(mata, alpha, matb)
 !
 !   A <- A + alpha*B
 !
     TYPE(wsmp_mat)   :: mata, matb
     DOUBLE PRECISION :: alpha
 !
     mata%val = mata%val + alpha*matb%val
   END SUBROUTINE maddto_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE maddto_zwsmp_mat(mata, alpha, matb)
 !
 !   A <- A + alpha*B
 !
     TYPE(zwsmp_mat) :: mata, matb
     DOUBLE COMPLEX  :: alpha
 !
     mata%val = mata%val + alpha*matb%val
   END SUBROUTINE maddto_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE psum_wsmp_mat(mat, comm)
 !
 !   Parallel sum of sparse matrices
 !
     INCLUDE "mpif.h"
 !
     TYPE(wsmp_mat)   :: mat
     INCLUDE 'psum_mat.tpl'
   END SUBROUTINE psum_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE psum_zwsmp_mat(mat, comm)
 !
 !   Parallel sum of sparse matrices
 !
     INCLUDE "mpif.h"
 !
     TYPE(zwsmp_mat)   :: mat
     INCLUDE 'psum_mat.tpl'
   END SUBROUTINE psum_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE p2p_wsmp_mat(mat, dest, extyp, op, comm)
 !
 !   Point-to-point combine sparse matrix between 2 processes
 !
     INCLUDE "mpif.h"
 !
     TYPE(wsmp_mat)            :: mat
     DOUBLE PRECISION, ALLOCATABLE :: val(:)
     INTEGER :: mpi_type=MPI_DOUBLE_PRECISION
 !
     INCLUDE "p2p_mat.tpl"
   END SUBROUTINE p2p_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE p2p_zwsmp_mat(mat, dest, extyp, op, comm)
 !
 !   Point-to-point combine sparse matrix between 2 processes
 !
     INCLUDE "mpif.h"
 !
     TYPE(zwsmp_mat)           :: mat
     DOUBLE COMPLEX, ALLOCATABLE :: val(:)
     INTEGER :: mpi_type=MPI_DOUBLE_COMPLEX
 !
     INCLUDE "p2p_mat.tpl"
   END SUBROUTINE p2p_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE pwsmp_bsplines
diff --git a/src/sparse_mod.f90 b/src/sparse_mod.f90
index 5bc289e..2b0dd78 100644
--- a/src/sparse_mod.f90
+++ b/src/sparse_mod.f90
@@ -1,899 +1,899 @@
 !>
 !> @file sparse_mod.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE sparse
 !
 !    SPARSE: Implement sparse matrix using dynamic linked lists
 !            as matrix rows.
 !
 !    T.M. Tran, CRPP-EPFL
 !    October 2010
 !
   IMPLICIT NONE
 !
   TYPE elt
      INTEGER            :: index=0
      DOUBLE PRECISION   :: val=0.0d0
      TYPE(elt), POINTER :: next => NULL()
   END TYPE elt
 !
   TYPE zelt
      INTEGER             :: index=0
      DOUBLE COMPLEX      :: val=(0.0d0, 0.0d0)
      TYPE(zelt), POINTER :: next => NULL()
   END TYPE zelt
 !
   TYPE sprow
      INTEGER            :: nnz=0           ! Number of non zeros in a row
      TYPE(elt), POINTER :: row0 => NULL()  ! Points to head of a (sparse) row
   END TYPE sprow
 !
   TYPE zsprow
      INTEGER             :: nnz=0          ! Number of non zeros in a row
      TYPE(zelt), POINTER :: row0 => NULL() ! Points to head of a (sparse) row
   END TYPE zsprow
 !
   TYPE spmat
      INTEGER :: rank
      TYPE(sprow), POINTER :: row(:) => NULL()
   END TYPE spmat
 !
   TYPE zspmat
      INTEGER :: rank
      TYPE(zsprow), POINTER :: row(:) => NULL()
   END TYPE zspmat
 !
   INTERFACE init
      MODULE PROCEDURE init_spmat, init_zspmat
  END INTERFACE init
 !
   INTERFACE updtmat
      MODULE PROCEDURE updt_sp, updt_zsp, updt_spmat, updt_zspmat
   END INTERFACE updtmat
 !
   INTERFACE putele
      MODULE PROCEDURE putele_sp, putele_zsp, putele_spmat, putele_zspmat
   END INTERFACE putele
 !
   INTERFACE getele
      MODULE PROCEDURE getele_sp, getele_zsp, getele_spmat, getele_zspmat
   END INTERFACE getele
 !
   INTERFACE putrow
      MODULE PROCEDURE putrow_csr,  putrow_full,  putrow_spmat, &
           &           putrow_zcsr, putrow_zfull, putrow_zspmat
   END INTERFACE putrow
 !
   INTERFACE getrow
      MODULE PROCEDURE getrow_csr,  getrow_full,  getrow_spmat, &
           &           getrow_zcsr, getrow_zfull, getrow_zspmat
   END INTERFACE getrow
 !
   INTERFACE putcol
      MODULE PROCEDURE putcol_spmat, putcol_zspmat
   END INTERFACE putcol
 !
   INTERFACE getcol
      MODULE PROCEDURE getcol_spmat, getcol_zspmat
   END INTERFACE getcol
 !
   INTERFACE get_count
      MODULE PROCEDURE get_count_sp,  get_count_spmat, &
           &           get_count_zsp, get_count_zspmat
   END INTERFACE get_count
 !
   INTERFACE destroy
      MODULE PROCEDURE destroy_spmat,  destroy_row,  destroy_node, &
           &           destroy_zspmat, destroy_zrow, destroy_znode
  END INTERFACE destroy
 !
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE init_spmat(n, mat, istart, iend)
 !
 !   Initial an empty  sparse matrix
 !
     INTEGER, INTENT(in)           :: n
     INTEGER, INTENT(in), OPTIONAL :: istart, iend
     TYPE(spmat)                   :: mat
 !
     mat%rank = n
     IF(ASSOCIATED(mat%row)) DEALLOCATE(mat%row)
     IF(PRESENT(istart)) THEN
        ALLOCATE(mat%row(istart:iend))
     ELSE
        ALLOCATE(mat%row(n))
     END IF
 !
   END SUBROUTINE init_spmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE init_zspmat(n, mat, istart, iend)
 !
 !   Initial an empty  sparse matrix
 !
     INTEGER, INTENT(in)           :: n
     INTEGER, INTENT(in), OPTIONAL :: istart, iend
     TYPE(zspmat)                  :: mat
 !
     mat%rank = n
     IF(ASSOCIATED(mat%row)) DEALLOCATE(mat%row)
     IF(PRESENT(istart)) THEN
        ALLOCATE(mat%row(istart:iend))
     ELSE
        ALLOCATE(mat%row(n))
     END IF
 !
   END SUBROUTINE init_zspmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE updt_sp(arow, j, val)
 !
 !   Update element j of row arow or insert it in an increasing "index"
 !
     TYPE(sprow), TARGET          :: arow
     INTEGER, INTENT(in)          :: j
     DOUBLE PRECISION, INTENT(in) :: val
 !
     TYPE(elt), TARGET :: pre_root
     TYPE(elt), POINTER :: t, p
 !
     pre_root%next => arow%row0 ! pre_root is linked to the head of the list.
     t => pre_root
     DO WHILE( ASSOCIATED(t%next) )
        p => t%next
        IF( p%index .EQ. j ) THEN
           p%val = p%val+val
           RETURN
        END IF
        IF( p%index .GT. j ) EXIT
        t => t%next
     END DO
     ALLOCATE(p)
     p = elt(j, val, t%next)
     t%next => p
 !
     arow%nnz = arow%nnz+1
     arow%row0 => pre_root%next ! In case the head is altered
   END SUBROUTINE updt_sp
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE updt_zsp(arow, j, val)
 !
 !   Update element j of row arow or insert it in an increasing "index"
 !
     TYPE(zsprow), TARGET       :: arow
     INTEGER, INTENT(in)        :: j
     DOUBLE COMPLEX, INTENT(in) :: val
 !
     TYPE(zelt), TARGET :: pre_root
     TYPE(zelt), POINTER :: t, p
 !
     pre_root%next => arow%row0 ! pre_root is linked to the head of the list.
     t => pre_root
     DO WHILE( ASSOCIATED(t%next) )
        p => t%next
        IF( p%index .EQ. j ) THEN
           p%val = p%val+val
           RETURN
        END IF
        IF( p%index .GT. j ) EXIT
        t => t%next
     END DO
     ALLOCATE(p)
     p = zelt(j, val, t%next)
     t%next => p
 !
     arow%nnz = arow%nnz+1
     arow%row0 => pre_root%next ! In case the head is altered
   END SUBROUTINE updt_zsp
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE updt_spmat(mat, i, j, val)
 !
 !   Update element Aij of sparse matrix
 !
     TYPE(spmat)                  :: mat
     INTEGER, INTENT(in)          :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
 !
     CALL updt_sp(mat%row(i), j, val)
   END SUBROUTINE updt_spmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE updt_zspmat(mat, i, j, val)
 !
 !   Update element Aij of sparse matrix
 !
     TYPE(zspmat)               :: mat
     INTEGER, INTENT(in)        :: i, j
     DOUBLE COMPLEX, INTENT(in) :: val
 !
     CALL updt_zsp(mat%row(i), j, val)
   END SUBROUTINE updt_zspmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getele_sp(arow, j, val, found)
 !
 !   Get element j from row arow
 !
     TYPE(sprow), TARGET            :: arow
     INTEGER, INTENT(in)            :: j
     DOUBLE PRECISION, INTENT(out)  :: val
     LOGICAL, INTENT(out), OPTIONAL :: found
 !
     TYPE(elt), POINTER :: t
     INTEGER :: i
 !
     val = 0.0d0
     t => arow%row0  ! Start of a row
     DO WHILE( ASSOCIATED(t) )
        IF(t%index .EQ. j) THEN
           val = t%val
           IF(PRESENT(found)) found = .TRUE.
           RETURN
        END IF
        t => t%next
     END DO
     IF(PRESENT(found)) found = .FALSE.
   END SUBROUTINE getele_sp
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getele_zsp(arow, j, val, found)
 !
 !   Get element j from row arow
 !
     TYPE(zsprow), TARGET           :: arow
     INTEGER, INTENT(in)            :: j
     DOUBLE COMPLEX, INTENT(out)    :: val
     LOGICAL, INTENT(out), OPTIONAL :: found
 !
     TYPE(zelt), POINTER :: t
     INTEGER :: i
 !
     val = 0.0d0
     t => arow%row0  ! Start of a row
     DO WHILE( ASSOCIATED(t) )
        IF(t%index .EQ. j) THEN
           val = t%val
           IF(PRESENT(found)) found = .TRUE.
           RETURN
        END IF
        t => t%next
     END DO
     IF(PRESENT(found)) found = .FALSE.
   END SUBROUTINE getele_zsp
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getele_spmat(mat, i, j, val)
 !
 !   Get element (i,j) of sparse matrix
 !
     TYPE(spmat)                   :: mat
     INTEGER, INTENT(in)           :: i, j
     DOUBLE PRECISION, INTENT(out) :: val
 !
     CALL getele(mat%row(i), j, val)
   END SUBROUTINE getele_spmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getele_zspmat(mat, i, j, val)
 !
 !   Get element (i,j) of sparse matrix
 !
     TYPE(zspmat)                :: mat
     INTEGER, INTENT(in)         :: i, j
     DOUBLE COMPLEX, INTENT(out) :: val
 !
     CALL getele(mat%row(i), j, val)
   END SUBROUTINE getele_zspmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putele_sp(arow, j, val, nlforce_zero)
 !
 !   Put (overwrite) element j of row arow or insert it in an increasing "index"
 !
     TYPE(sprow), TARGET           :: arow
     INTEGER, INTENT(in)           :: j
     DOUBLE PRECISION, INTENT(in)  :: val
     LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
 !
     TYPE(elt), TARGET :: pre_root
     TYPE(elt), POINTER :: t, p
     LOGICAL :: rmnode
 !
     pre_root%next => arow%row0 ! pre_root is linked to the head of the list.
     t => pre_root
 !
 !    Remove node which has zero val or not?
 !    But never create new node with zero val
 !
     rmnode = .TRUE.
     IF(PRESENT(nlforce_zero)) rmnode = .NOT.nlforce_zero
 !
     DO WHILE( ASSOCIATED(t%next) )
        p => t%next
        IF( p%index .EQ. j ) THEN
           IF(ABS(val).LE.EPSILON(0.0d0) .AND. rmnode) THEN ! Remove the node for zero val!
              t%next => p%next
              arow%nnz = arow%nnz-1
              arow%row0 => pre_root%next ! In case the head is altered
              DEALLOCATE(p)
           ELSE
              p%val = val
           END IF
           RETURN
        END IF
        IF( p%index .GT. j ) EXIT
        t => t%next
     END DO
 !
 !    Never create new node with zero val
 !
     IF(ABS(val).GT.EPSILON(0.0d0)) THEN
        ALLOCATE(p)
        p = elt(j, val, t%next)
        t%next => p
        arow%nnz = arow%nnz+1
        arow%row0 => pre_root%next
     END IF
   END SUBROUTINE putele_sp
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putele_zsp(arow, j, val, nlforce_zero)
 !
 !   Put (overwrite) element j of row arow or insert it in an increasing "index"
 !
     TYPE(zsprow), TARGET       :: arow
     INTEGER, INTENT(in)        :: j
     DOUBLE COMPLEX, INTENT(in) :: val
     LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
 !
     TYPE(zelt), TARGET :: pre_root
     TYPE(zelt), POINTER :: t, p
     LOGICAL :: rmnode
 !
     pre_root%next => arow%row0 ! pre_root is linked to the head of the list.
     t => pre_root
 !
 !    Remove node which has zero val or not?
 !    But never create new node with zero val
 !
     rmnode = .TRUE.
     IF(PRESENT(nlforce_zero)) rmnode = .NOT.nlforce_zero
 !
     DO WHILE( ASSOCIATED(t%next) )
        p => t%next
        IF( p%index .EQ. j ) THEN
           IF(ABS(val).LE.EPSILON(0.0d0) .AND. rmnode) THEN ! Remove the node for zero val!
              t%next => p%next
              arow%nnz = arow%nnz-1
              arow%row0 => pre_root%next ! In case the head is altered
              DEALLOCATE(p)
           ELSE
              p%val = val
           END IF
           RETURN
        END IF
        IF( p%index .GT. j ) EXIT
        t => t%next
     END DO
 !
 !    Never create new node with zero val
 !
     IF(ABS(val).GT.EPSILON(0.0d0)) THEN
        ALLOCATE(p)
        p = zelt(j, val, t%next)
        t%next => p
        arow%nnz = arow%nnz+1
        arow%row0 => pre_root%next
     END IF
   END SUBROUTINE putele_zsp
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putele_spmat(mat, i, j, val, nlforce_zero)
 !
 !  Put element (i,j) of matrix
 !
     TYPE(spmat)                   :: mat
     INTEGER, INTENT(in)           :: i, j
     DOUBLE PRECISION, INTENT(in)  :: val
     LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
 !
     CALL putele(mat%row(i), j, val, nlforce_zero)
   END SUBROUTINE putele_spmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putele_zspmat(mat, i, j, val, nlforce_zero)
 !
 !  Put element (i,j) of matrix
 !
     TYPE(zspmat)                  :: mat
     INTEGER, INTENT(in)           :: i, j
     DOUBLE COMPLEX, INTENT(in)    :: val
     LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
 !
     CALL putele(mat%row(i), j, val, nlforce_zero)
   END SUBROUTINE putele_zspmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   INTEGER FUNCTION get_count_sp(arow)
 !
 !  Number of elements in arow
 !
     TYPE(sprow), INTENT(in) :: arow
     TYPE(elt), POINTER :: t
     INTEGER :: i
 !
     t => arow%row0  ! Start of a row
     i = 0
     DO WHILE( ASSOCIATED(t) )
        i=i+1
        t => t%next
     END DO
     get_count_sp = i
   END FUNCTION get_count_sp
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   INTEGER FUNCTION get_count_zsp(arow)
 !
 !  Number of elements in arow
 !
     TYPE(zsprow), INTENT(in) :: arow
     TYPE(zelt), POINTER :: t
     INTEGER :: i
 !
     t => arow%row0  ! Start of a row
     i = 0
     DO WHILE( ASSOCIATED(t) )
        i=i+1
        t => t%next
     END DO
     get_count_zsp = i
   END FUNCTION get_count_zsp
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   INTEGER FUNCTION get_count_spmat(mat, nnz)
 !
 !   Number of non-zeros in sparse matrix
 !
     TYPE(spmat)                    :: mat
     INTEGER, INTENT(out), OPTIONAL :: nnz(:)
 !
     INTEGER :: i, c(LBOUND(mat%row,1):UBOUND(mat%row,1))
     DO i=LBOUND(mat%row,1),UBOUND(mat%row,1)
        c(i) = get_count_sp(mat%row(i))
     END DO
     IF(PRESENT(nnz)) nnz = c
     get_count_spmat = SUM(c)
   END FUNCTION get_count_spmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   INTEGER FUNCTION get_count_zspmat(mat, nnz)
 !
 !   Number of non-zeros in sparse matrix
 !
     TYPE(zspmat)                   :: mat
     INTEGER, INTENT(out), OPTIONAL :: nnz(:)
 !
     INTEGER :: i, c(LBOUND(mat%row,1):UBOUND(mat%row,1))
     DO i=LBOUND(mat%row,1),UBOUND(mat%row,1)
        c(i) = get_count_zsp(mat%row(i))
     END DO
     IF(PRESENT(nnz)) nnz = c
     get_count_zspmat = SUM(c)
   END FUNCTION get_count_zspmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getrow_csr(arow, arr, col, count)
 !
 !   Get a row from sparse row arow and put it in a CSR format
 !
     TYPE(sprow), INTENT(in)        :: arow
     DOUBLE PRECISION, INTENT(out)  :: arr(:)
     INTEGER, INTENT(out)           :: col(:)
     INTEGER, OPTIONAL, INTENT(out) :: count
 !
     TYPE(elt), POINTER :: t
     INTEGER :: i
 !
     t => arow%row0  ! Start of a row
     i = 0
     DO WHILE( ASSOCIATED(t) )
        i=i+1
        col(i) = t%index
        arr(i) = t%val
        t => t%next
     END DO
     IF(PRESENT(count)) count = i
   END SUBROUTINE getrow_csr
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getrow_zcsr(arow, arr, col, count)
 !
 !   Get a row from sparse row arow and put it in a CSR format
 !
     TYPE(zsprow), INTENT(in)       :: arow
     DOUBLE COMPLEX, INTENT(out)    :: arr(:)
     INTEGER, INTENT(out)           :: col(:)
     INTEGER, OPTIONAL, INTENT(out) :: count
 !
     TYPE(zelt), POINTER :: t
     INTEGER :: i
 !
     t => arow%row0  ! Start of a row
     i = 0
     DO WHILE( ASSOCIATED(t) )
        i=i+1
        col(i) = t%index
        arr(i) = t%val
        t => t%next
     END DO
     IF(PRESENT(count)) count = i
   END SUBROUTINE getrow_zcsr
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getrow_full(arow, arr, count)
 !
 !   Get a row from sparse row arow and put it in an full row
 !
     TYPE(sprow), INTENT(in)        :: arow
     DOUBLE PRECISION, INTENT(out)  :: arr(:)
     INTEGER, OPTIONAL, INTENT(out) :: count
 !
     TYPE(elt), POINTER :: t
     INTEGER :: n, i, j
 !
     n = SIZE(arr)
     arr = 0.0d0
     t => arow%row0  ! Start of a row
     i = 0
     DO WHILE( ASSOCIATED(t) )
        i=i+1
        j = t%index
        IF(j.LE.n) THEN
           arr(j) = t%val
           t => t%next
        ELSE
           WRITE(*,'(a)') 'GETROW_FULL: size of input ARR too small!'
           STOP
        END IF
     END DO
     IF(PRESENT(count)) count = i
   END SUBROUTINE getrow_full
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getrow_zfull(arow, arr, count)
 !
 !   Get a row from sparse row arow and put it in an full row
 !
     TYPE(zsprow), INTENT(in)       :: arow
     DOUBLE COMPLEX, INTENT(out)    :: arr(:)
     INTEGER, OPTIONAL, INTENT(out) :: count
 !
     TYPE(zelt), POINTER :: t
     INTEGER :: n, i, j
 !
     n = SIZE(arr)
     arr = 0.0d0
     t => arow%row0  ! Start of a row
     i = 0
     DO WHILE( ASSOCIATED(t) )
        i=i+1
        j = t%index
        IF(j.LE.n) THEN
           arr(j) = t%val
           t => t%next
        ELSE
           WRITE(*,'(a)') 'GETROW_FULL: size of input ARR too small!'
           STOP
        END IF
     END DO
     IF(PRESENT(count)) count = i
   END SUBROUTINE getrow_zfull
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getrow_spmat(mat, i, arr, col)
 !
 !   Get a row from sparse matrix
 !
     TYPE(spmat), INTENT(in)        :: mat
     INTEGER, INTENT(in)            :: i
     DOUBLE PRECISION, INTENT(out)  :: arr(:)
     INTEGER, INTENT(out), OPTIONAL :: col(:)
 !
     IF(PRESENT(col)) THEN ! The output row is defined by (col, arr)
        CALL getrow_csr(mat%row(i), arr, col)
     ELSE
        CALL getrow_full(mat%row(i), arr)
     END IF
   END SUBROUTINE getrow_spmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getrow_zspmat(mat, i, arr, col)
 !
 !   Get a row from sparse matrix
 !
     TYPE(zspmat), INTENT(in)       :: mat
     INTEGER, INTENT(in)            :: i
     DOUBLE COMPLEX, INTENT(out)    :: arr(:)
     INTEGER, INTENT(out), OPTIONAL :: col(:)
 !
     IF(PRESENT(col)) THEN ! The output row is defined by (col, arr)
        CALL getrow_zcsr(mat%row(i), arr, col)
     ELSE
        CALL getrow_zfull(mat%row(i), arr)
     END IF
   END SUBROUTINE getrow_zspmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putrow_csr(arow, arr, col, nlforce_zero)
 !
 !   Put a row from sparse row arow and put it in a CSR format
 !
     TYPE(sprow), INTENT(inout)    :: arow
     DOUBLE PRECISION, INTENT(in)  :: arr(:)
     INTEGER, INTENT(in)           :: col(:)
     LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
 !
     INTEGER :: n, i
 !
     n=SIZE(arr)
     DO i=1,n
        CALL putele(arow, col(i), arr(i), nlforce_zero)
     END DO
   END SUBROUTINE putrow_csr
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putrow_zcsr(arow, arr, col, nlforce_zero)
 !
 !   Put a row from sparse row arow and put it in a CSR format
 !
     TYPE(zsprow), INTENT(inout)   :: arow
     DOUBLE COMPLEX, INTENT(in)    :: arr(:)
     INTEGER, INTENT(in)           :: col(:)
     LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
 !
     INTEGER :: n, i
 !
     n=SIZE(arr)
     DO i=1,n
        CALL putele(arow, col(i), arr(i), nlforce_zero)
     END DO
   END SUBROUTINE putrow_zcsr
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putrow_full(arow, arr, nlforce_zero)
 !
 !   Put a row from sparse row arow and put it in a full row
 !
     TYPE(sprow), INTENT(inout)    :: arow
     DOUBLE PRECISION, INTENT(in)  :: arr(:)
     LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
 !
     INTEGER :: n, i
 !
     n=SIZE(arr)
     DO i=1,n
        CALL putele(arow, i, arr(i), nlforce_zero)
     END DO
   END SUBROUTINE putrow_full
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putrow_zfull(arow, arr, nlforce_zero)
 !
 !   Put a row from sparse row arow and put it in a full row
 !
     TYPE(zsprow), INTENT(inout)    :: arow
     DOUBLE COMPLEX, INTENT(in)  :: arr(:)
     LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
 !
     INTEGER :: n, i
 !
     n=SIZE(arr)
     DO i=1,n
        CALL putele(arow, i, arr(i), nlforce_zero)
     END DO
   END SUBROUTINE putrow_zfull
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putrow_spmat(mat, i, arr, col, nlforce_zero)
 !
 !   Put a row to matrix
 !
     TYPE(spmat)                   :: mat
     INTEGER, intent(in)           :: i
     DOUBLE PRECISION, INTENT(in)  :: arr(:)
     INTEGER, INTENT(in), OPTIONAL :: col(:)
     LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
 !
     IF(PRESENT(col)) THEN ! The input row is defined by (col, arr)
        CALL putrow_csr(mat%row(i), arr, col, nlforce_zero)
     ELSE
        CALL putrow_full(mat%row(i), arr, nlforce_zero)
     END IF
   END SUBROUTINE putrow_spmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putrow_zspmat(mat, i, arr, col, nlforce_zero)
 !
 !   Put a row to matrix
 !
     TYPE(zspmat)                  :: mat
     INTEGER, intent(in)           :: i
     DOUBLE COMPLEX, INTENT(in)    :: arr(:)
     INTEGER, INTENT(in), OPTIONAL :: col(:)
     LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
 !
     IF(PRESENT(col)) THEN ! The input row is defined by (col, arr)
        CALL putrow_zcsr(mat%row(i), arr, col, nlforce_zero)
     ELSE
        CALL putrow_zfull(mat%row(i), arr, nlforce_zero)
     END IF
   END SUBROUTINE putrow_zspmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putcol_spmat(mat, j, arr, nlforce_zero)
 !
 !   Put a column to mtarix
 !
     TYPE(spmat)                   :: mat
     INTEGER, INTENT(in)           :: j
     DOUBLE PRECISION, INTENT(in)  :: arr(:)
     LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
 !
     INTEGER :: i
     DO i=1,mat%rank
        CALL putele(mat, i, j, arr(i), nlforce_zero)
     END DO
   END SUBROUTINE putcol_spmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putcol_zspmat(mat, j, arr, nlforce_zero)
 !
 !   Put a column to mtarix
 !
     TYPE(zspmat)                  :: mat
     INTEGER, INTENT(in)           :: j
     DOUBLE COMPLEX, INTENT(in)    :: arr(:)
     LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
 !
     INTEGER :: i
     DO i=1,mat%rank
        CALL putele(mat, i, j, arr(i), nlforce_zero)
     END DO
   END SUBROUTINE putcol_zspmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getcol_spmat(mat, j, arr)
 !
 !   Get column j of matrix
 !
     TYPE(spmat)                   :: mat
     INTEGER, INTENT(in)           :: j
     DOUBLE PRECISION, INTENT(out) :: arr(:)
     INTEGER :: i
     DO i=1,mat%rank
        CALL getele(mat, i, j, arr(i))
     END DO
   END SUBROUTINE getcol_spmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getcol_zspmat(mat, j, arr)
 !
 !   Get column j of matrix
 !
     TYPE(zspmat)                :: mat
     INTEGER, INTENT(in)         :: j
     DOUBLE COMPLEX, INTENT(out) :: arr(:)
     INTEGER :: i
     DO i=1,mat%rank
        CALL getele(mat, i, j, arr(i))
     END DO
   END SUBROUTINE getcol_zspmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE destroy_row(arow)
 !
 !  Deallocate the sparse row
 !
     TYPE(sprow), INTENT(inout) :: arow
 !
     IF(ASSOCIATED(arow%row0)) CALL destroy_node(arow%row0)
     arow%nnz = get_count(arow)
   END SUBROUTINE destroy_row
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE destroy_zrow(arow)
 !
 !  Deallocate the sparse row
 !
     TYPE(zsprow), INTENT(inout) :: arow
 !
     IF(ASSOCIATED(arow%row0)) CALL destroy_znode(arow%row0)
     arow%nnz = get_count(arow)
   END SUBROUTINE destroy_zrow
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   RECURSIVE SUBROUTINE destroy_node(p)
 !
 !  Deallocate recursively the linked list
 !
     TYPE(elt), POINTER :: p
 !
     IF(ASSOCIATED(p%next)) CALL destroy_node(p%next)
     DEALLOCATE(p)
   END SUBROUTINE destroy_node
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   RECURSIVE SUBROUTINE destroy_znode(p)
 !
 !  Deallocate recursively the linked list
 !
     TYPE(zelt), POINTER :: p
 !
     IF(ASSOCIATED(p%next)) CALL destroy_znode(p%next)
     DEALLOCATE(p)
   END SUBROUTINE destroy_znode
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE destroy_spmat(mat)
 !
 !  Deallocate the sparse matrix
 !
     TYPE(spmat) :: mat
     INTEGER :: n, i
 !
     n = mat%rank
     DO i=LBOUND(mat%row,1),UBOUND(mat%row,1)
        CALL destroy(mat%row(i))
     END DO
     DEALLOCATE(mat%row)
   END SUBROUTINE destroy_spmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE destroy_zspmat(mat)
 !
 !  Deallocate the sparse matrix
 !
     TYPE(zspmat) :: mat
     INTEGER :: n, i
 !
     n = mat%rank
     DO i=LBOUND(mat%row,1),UBOUND(mat%row,1)
        CALL destroy(mat%row(i))
     END DO
     DEALLOCATE(mat%row)
   END SUBROUTINE destroy_zspmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   INTEGER FUNCTION isearch(karr, k)
 !
 !   Sequential search an ordered table of integers
 !
     INTEGER, INTENT(in) :: karr(0:)
     INTEGER, INTENT(in) :: k
     INTEGER :: n
 !
     n=SIZE(karr)
     isearch = -1   ! Failure
     IF( k.GT.karr(n-1)) RETURN
 !
     isearch=0
     DO 
        IF( k.LE.karr(isearch)) EXIT
        isearch = isearch+1
     END DO
     IF( k.NE.karr(isearch)) isearch = -1  ! Failure
   END FUNCTION isearch
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   INTEGER FUNCTION isearch_bin(karr, k)
 !
 !   Binary search an ordered table of integers
 !
     INTEGER, INTENT(in) :: karr(0:)
     INTEGER, INTENT(in) :: k
     INTEGER :: n
     INTEGER :: l, u
 !
     n=SIZE(karr)
     isearch_bin = -1   ! Failure
     IF( k.LT.karr(0) .OR. k.GT.karr(n-1)) RETURN
 !
     l=0; u=n-1
     DO WHILE(l.LE.u)
        isearch_bin = (l+u)/2
        IF(k.EQ.karr(isearch_bin)) THEN 
           RETURN
        ELSE IF(k.LT.karr(isearch_bin)) THEN
           u = isearch_bin-1
        ELSE
           l = isearch_bin+1
        END IF
     END DO
     isearch_bin = -1   ! Failure
   END FUNCTION isearch_bin
 !
 END MODULE sparse
diff --git a/src/tsparse3.f90 b/src/tsparse3.f90
index 0e75703..1f23792 100644
--- a/src/tsparse3.f90
+++ b/src/tsparse3.f90
@@ -1,705 +1,705 @@
 !>
 !> @file tsparse3.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !  Solving the 2d PDE using splines and WSMP non-symmetric matrix
 !
 !    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
 !    exact solution: f(x,y) = (1-x^2) x^m cos(my)
 !
 MODULE pde2d_mod
   USE bsplines
   USE wsmp_bsplines
   IMPLICIT NONE
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dismat(spl, mat)
 !
 !   Assembly of FE matrix mat using spline spl
 !
     TYPE(spline2d), INTENT(in)       :: spl
     TYPE(wsmp_mat), INTENT(inout)    :: mat
 !
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2
     INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
     DOUBLE PRECISION:: contrib
 !
     INTEGER :: kterms         ! Number of terms in weak form
     INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
     DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
     INTEGER, ALLOCATABLE :: left1(:), left2(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
     WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
     WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !
 !   Weak form
 !
     kterms = mat%nterms
     ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
     ALLOCATE(coefs(kterms,ng1,ng2))
 !
     ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
     ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! 
 !===========================================================================
 !              2.0 Assembly loop
 !
     ALLOCATE(left1(ng1))
     ALLOCATE(left2(ng2))
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        left1 = i
        CALL basfun(xg1, spl%sp1, fun1, left1)
        DO j=1,n2
           CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
           left2 = j
           CALL basfun(xg2, spl%sp2, fun2, left2)
 !
           DO ig1=1,ng1
              DO ig2=1,ng2
                 CALL coefeq(xg1(ig1), xg2(ig2), &
                      &      idert(:,:,ig1,ig2), &
                      &      iderw(:,:,ig1,ig2), &
                      &      coefs(:,ig1,ig2))
              END DO
           END DO
 !
           DO iw1=0,nidbas1  ! Weight function in dir 1
              igw1 = i+iw1
              DO iw2=0,nidbas2  ! Weight function in dir 2
                 igw2 = MODULO(j+iw2-1, n2) + 1
                 irow = igw2 + (igw1-1)*n2
                 DO it1=0,nidbas1  ! Test function in dir 1
                    igt1 = i+it1
                    DO it2=0,nidbas2  ! Test function in dir 2
                       igt2 = MODULO(j+it2-1, n2) + 1
                       jcol = igt2 + (igt1-1)*n2
 !-------------
                       contrib = 0.0d0
                       DO ig1=1,ng1
                          DO ig2=1,ng2
                             DO iterm=1,kterms
                                contrib = contrib + &
                                     &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
                                     &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
                                     &    coefs(iterm,ig1,ig2) *                 &
                                     &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
                                     &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
                                     &    wg1(ig1) * wg2(ig2)
                             END DO
                          END DO
                       END DO
                       CALL updtmat(mat, irow, jcol, contrib)
 !-------------
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
     DEALLOCATE(idert, iderw, coefs)
     DEALLOCATE(left1,left2)
 !
   CONTAINS
     SUBROUTINE coefeq(x, y, idt, idw, c)
       DOUBLE PRECISION, INTENT(in) :: x, y
       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
       DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
 !
 ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
 !
       c(1) = x        ! 
       idt(1,1) = 1
       idt(1,2) = 0
       idw(1,1) = 1
       idw(1,2) = 0
       !
       c(2) = 1.d0/x
       idt(2,1) = 0
       idt(2,2) = 1
       idw(2,1) = 0
       idw(2,2) = 1
     END SUBROUTINE coefeq
   END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE disrhs(mbess, spl, rhs)
 !
 !   Assembly the RHS using 2d spline spl
 !
     INTEGER, INTENT(in) :: mbess
     TYPE(spline2d), INTENT(in) :: spl
     DOUBLE PRECISION, INTENT(out) :: rhs(:)
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
 !
     ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !===========================================================================
 !              2.0 Assembly loop
 !
     nrank = SIZE(rhs)
     rhs(1:nrank) = 0.0d0
 !
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), spl%sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                 contrib = wg1(ig1)*wg2(ig2) * &
                      &    rhseq(xg1(ig1),xg2(ig2), mbess)
                 DO k1=0,nidbas1
                    i1 = i+k1
                    DO k2=0,nidbas2
                       j2 = MODULO(j+k2-1,n2) + 1
                       ij = j2 + (i1-1)*n2
                       rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
 !
   CONTAINS
     DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
       DOUBLE PRECISION, INTENT(in) :: x1, x2
       INTEGER, INTENT(in) :: m
       rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
     END FUNCTION rhseq
   END SUBROUTINE disrhs
 !
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcmat(mat, ny)
 !
 !   Apply BC on matrix
 !
     TYPE(wsmp_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank, i, j
     DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = mat%rank
     ALLOCATE(zsum(nrank), arr(nrank))
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum on the NY-th row
 !
     zsum = 0.0d0
     DO i=1,ny
        arr = 0.0d0
        CALL getrow(mat, i, arr)
        zsum(:) = zsum(:) + arr(:)
     END DO
     CALL putrow(mat, ny, zsum)
 !
 !   The horizontal sum on the NY-th column
 !
     zsum = 0.0d0
     DO j=1,ny
        arr = 0.0d0
        CALL getcol(mat, j, arr)
        zsum(ny:) = zsum(ny:) + arr(ny:)
     END DO
     CALL putcol(mat, ny, zsum)
 !
 !   The away operator
 !
     DO j = 1,ny-1
        arr = 0.0d0; arr(j) = 1.0d0
        CALL putcol(mat, j, arr)     
     END DO
 !
     DO i = 1,ny-1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !  
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     DO j = nrank, nrank-ny+1, -1
        arr = 0.0d0; arr(j) = 1.0d0
        CALL putcol(mat, j, arr)     
     END DO
 !
     DO i = nrank, nrank-ny+1, -1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(zsum, arr)
 !
   END SUBROUTINE ibcmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcrhs(rhs, ny)
 !
 !   Apply BC on RHS
 !
     DOUBLE PRECISION, INTENT(inout) :: rhs(:)
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank
     DOUBLE PRECISION :: zsum
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = SIZE(rhs,1)
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum
 !
     zsum = SUM(rhs(1:ny))
     rhs(ny) = zsum
     rhs(1:ny-1) = 0.0d0
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     rhs(nrank-ny+1:nrank) = 0.0d0
   END SUBROUTINE ibcrhs
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE pde2d_mod
 PROGRAM main
   USE pde2d_mod
   USE futils
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
   LOGICAL :: nlppform
   INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
   DOUBLE PRECISION :: pi, coefx(5), coefy(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
   TYPE(spline2d) :: splxy
   TYPE(wsmp_mat) :: mat
 !
   CHARACTER(len=128) :: file='pde2d_wsmp.h5'
   INTEGER :: fid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
   DOUBLE PRECISION :: seconds, mem, dopla
   DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
   DOUBLE PRECISION :: tconv, treord
   INTEGER :: nits=100
   LOGICAL :: nlmetis, nlforce_zero
 !
   NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlmetis, &
        &            nlforce_zero, coefx, coefy
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intervals in x
   ny = 8              ! Number of intervals in y
   nidbas = (/3,3/)    ! Degree of splines
   ngauss = (/4,4/)    ! Number of Gauss points/interval
   mbess = 2           ! Exponent of differential problem
   nterms = 2          ! Number of terms in weak form
   nlppform = .TRUE.   ! Use PPFORM for gridval or not
   nlmetis = .FALSE.   ! Use metis ordering or minimum degree
   nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x (=radial) & y (=poloidal) axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NY', ny)
   CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
   CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
   CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
   CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
   CALL attach(fid, '/', 'MBESS', mbess)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   t0 = seconds()
   CALL set_spline(nidbas, ngauss, &
        &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
 !
 !   FE matrix assembly
 !
   nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
   WRITE(*,'(a,i8)') 'nrank', nrank
 !
   CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero)
   CALL dismat(splxy, mat)
   ALLOCATE(arr(nrank))
   IF(nrank.LT.100) THEN
      DO i=1,nrank
         CALL getele(mat, i, i, arr(i))
      END DO
      WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr
   END IF
 !
 !   BC on Matrix
 !
   WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
   WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
   CALL ibcmat(mat, ny)
   IF(nrank.LT.100) THEN
      DO i=1,nrank
         CALL getele(mat, i, i, arr(i))
      END DO
      WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr
      WRITE(*,'(a)') 'Last rows'
      DO i=nrank-ny,nrank
         CALL getrow(mat, i, arr)
         WRITE(*,'(10(1pe12.3))') arr
      END DO
   END IF
   tmat = seconds() - t0
 !
 !   RHS assembly
 !
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(mbess, splxy, rhs)
 !
 !   BC on RHS
 !
   CALL ibcrhs(rhs, ny)
 !
   CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
   WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
   t0 = seconds()
   CALL to_mat(mat)
   tconv = seconds() -t0
   WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat)
   WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem()
 !
   t0 = seconds()
   CALL reord_mat(mat, nlmetis=nlmetis, debug=.FALSE.)
   CALL putmat(fid, '/MAT', mat)
   treord = seconds() - t0
 !
   t0 = seconds()
   CALL numfact(mat, debug=.FALSE.)
   tfact = seconds() - t0
  
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
 !
   CALL bsolve(mat, rhs, sol, debug=.FALSE.)
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL bsolve(mat, rhs, sol)
      sol(1:ny-1) = sol(ny)
   END DO
   WRITE(*,'(/a,i6)') 'Number of refinement steps = ',mat%p%iparm(26)
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
   tsolv = (seconds() - t0)/REAL(nits)
 !
 !   Spline coefficients, taking into account of periodicity in y
 !   Note: in SOL, y was numbered first.
 !
   dimx = splxy%sp1%dim
   dimy = splxy%sp2%dim
   ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,ny) + i*ny + 1
         bcoef(i,j) = sol(ij)
      END DO
   END DO
   WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
   CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
 !===========================================================================
 !              4.0 Check the solution
 !
 !   Check function values computed with various method
 !
   ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
      END DO
   END DO
   jder = (/0,0/)
 !
 !   Compute PPFORM/BCOEFS at first call to gridval
   CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
 !
   WRITE(*,'(/a)') '*** Checking solutions'
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   END DO
   tgrid = (seconds() - t0)/REAL(nits)
   errsol = solana - solcal
   IF( SIZE(bcoef,2) .LE. 10 ) THEN
      CALL prnmat('BCOEF', bcoef)
      CALL prnmat('SOLANA', solana)
      CALL prnmat('SOLCAL', solcal)
      CALL prnmat('ERRSOL', errsol)
   END IF
   WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
        &    norm2(errsol) / norm2(solana)
   WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
 !
   CALL putarr(fid, '/xgrid', xgrid, 'r')
   CALL putarr(fid, '/ygrid', ygrid, '\theta')
   CALL putarr(fid, '/sol', solcal, 'Solutions')
   CALL putarr(fid, '/solana', solana,'Exact solutions')
   CALL putarr(fid, '/errors', errsol, 'Errors')
 !
 !   Check derivatives d/dx and d/dy
 !
   WRITE(*,'(/a)') '*** Checking gradient'
   DO i=0,nx
      DO j=0,ny
         IF( mbess .EQ. 0 ) THEN
            solana(i,j) = -2.0d0 * xgrid(i)
         ELSE
            solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
              &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
         END IF
      END DO
   END DO
 !
   jder = (/1,0/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   errsol = solana - solcal
   CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
 !
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
      END DO
   END DO
 !
   jder = (/0,1/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
   errsol = solana - solcal
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
 !
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s)   ', tconv
   WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s)      ', treord
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
   WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s)         ', tsolv
   WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
 !===========================================================================
 !              5.0 Clear the matrix and recompute
 !
   WRITE(*,'(/a)') 'Recompute the solver ...'
   t0 = seconds()
   CALL clear_mat(mat)
   CALL dismat(splxy, mat)
   CALL ibcmat(mat, ny)
   tmat = seconds()-t0
 !
   t0 = seconds()
   CALL numfact(mat, debug=.FALSE.)
   tfact = seconds()-t0
 !
   t0 = seconds()
   ALLOCATE(newsol(nrank))
   CALL bsolve(mat, rhs, newsol)
   newsol(1:ny-1) = newsol(ny)
   tsolv = seconds()-t0
 !
   WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s)         ', tsolv
   WRITE(*,'(a,1pe12.3)') 'Total (s)                     ', tmat+tfact+tsolv
 !
   DEALLOCATE(newsol)
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(xgrid, rhs, sol)
   DEALLOCATE(solcal, solana, errsol)
   DEALLOCATE(bcoef)
   DEALLOCATE(arr)
   CALL destroy_sp(splxy)
   CALL destroy(mat)
 !
   CALL closef(fid)
 !===========================================================================
 !
 CONTAINS
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:,:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i, j
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        DO j=1,SIZE(x,2)
           sum2 = sum2 + x(i,j)**2
        END DO
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
   SUBROUTINE prnmat(label, mat)
     CHARACTER(len=*) :: label
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
     INTEGER :: i
     WRITE(*,'(/a)') TRIM(label)
     DO i=1,SIZE(mat,1)
        WRITE(*,'(10(1pe12.3))') mat(i,:)
     END DO
   END SUBROUTINE prnmat
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
diff --git a/src/tsparse4.f90 b/src/tsparse4.f90
index 40b900f..b328bbe 100644
--- a/src/tsparse4.f90
+++ b/src/tsparse4.f90
@@ -1,722 +1,722 @@
 !>
 !> @file tsparse4.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !  Solving the 2d PDE using splines and PARDISO non-symmetric matrix
 !
 !    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
 !    exact solution: f(x,y) = (1-x^2) x^m cos(my)
 !
 MODULE pde2d_mod
   USE bsplines
   USE pardiso_bsplines
   IMPLICIT NONE
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE dismat(spl, mat)
 !
 !   Assembly of FE matrix mat using spline spl
 !
     TYPE(spline2d), INTENT(in)       :: spl
     TYPE(pardiso_mat), INTENT(inout) :: mat
 !
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2
     INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
     DOUBLE PRECISION:: contrib
 !
     INTEGER :: kterms         ! Number of terms in weak form
     INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
     DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
     INTEGER, ALLOCATABLE :: left1(:), left2(:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
     WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
     WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !
 !   Weak form
 !
     kterms = mat%nterms
     ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
     ALLOCATE(coefs(kterms,ng1,ng2))
 !
     ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
     ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! 
 !===========================================================================
 !              2.0 Assembly loop
 !
     ALLOCATE(left1(ng1))
     ALLOCATE(left2(ng2))
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        left1 = i
        CALL basfun(xg1, spl%sp1, fun1, left1)
        DO j=1,n2
           CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
           left2 = j
           CALL basfun(xg2, spl%sp2, fun2, left2)
 !
           DO ig1=1,ng1
              DO ig2=1,ng2
                 CALL coefeq(xg1(ig1), xg2(ig2), &
                      &      idert(:,:,ig1,ig2), &
                      &      iderw(:,:,ig1,ig2), &
                      &      coefs(:,ig1,ig2))
              END DO
           END DO
 !
           DO iw1=0,nidbas1  ! Weight function in dir 1
              igw1 = i+iw1
              DO iw2=0,nidbas2  ! Weight function in dir 2
                 igw2 = MODULO(j+iw2-1, n2) + 1
                 irow = igw2 + (igw1-1)*n2
                 DO it1=0,nidbas1  ! Test function in dir 1
                    igt1 = i+it1
                    DO it2=0,nidbas2  ! Test function in dir 2
                       igt2 = MODULO(j+it2-1, n2) + 1
                       jcol = igt2 + (igt1-1)*n2
 !-------------
                       contrib = 0.0d0
                       DO ig1=1,ng1
                          DO ig2=1,ng2
                             DO iterm=1,kterms
                                contrib = contrib + &
                                     &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
                                     &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
                                     &    coefs(iterm,ig1,ig2) *                 &
                                     &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
                                     &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
                                     &    wg1(ig1) * wg2(ig2)
                             END DO
                          END DO
                       END DO
                       CALL updtmat(mat, irow, jcol, contrib)
 !-------------
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
     DEALLOCATE(idert, iderw, coefs)
     DEALLOCATE(left1,left2)
 !
   CONTAINS
     SUBROUTINE coefeq(x, y, idt, idw, c)
       DOUBLE PRECISION, INTENT(in) :: x, y
       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
       DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
 !
 ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
 !
       c(1) = x        ! 
       idt(1,1) = 1
       idt(1,2) = 0
       idw(1,1) = 1
       idw(1,2) = 0
       !
       c(2) = 1.d0/x
       idt(2,1) = 0
       idt(2,2) = 1
       idw(2,1) = 0
       idw(2,2) = 1
     END SUBROUTINE coefeq
   END SUBROUTINE dismat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE disrhs(mbess, spl, rhs)
 !
 !   Assembly the RHS using 2d spline spl
 !
     INTEGER, INTENT(in) :: mbess
     TYPE(spline2d), INTENT(in) :: spl
     DOUBLE PRECISION, INTENT(out) :: rhs(:)
     INTEGER :: n1, nidbas1, ndim1, ng1
     INTEGER :: n2, nidbas2, ndim2, ng2
     INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
     DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
     DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
     DOUBLE PRECISION:: contrib
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
     CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
     CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
 !
     ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
     ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
 !
 !   Gauss quadature
 !
     CALL get_gauss(spl%sp1, ng1)
     CALL get_gauss(spl%sp2, ng2)
     ALLOCATE(xg1(ng1), wg1(ng1))
     ALLOCATE(xg2(ng1), wg2(ng1))
 !===========================================================================
 !              2.0 Assembly loop
 !
     nrank = SIZE(rhs)
     rhs(1:nrank) = 0.0d0
 !
     DO i=1,n1
        CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
        DO ig1=1,ng1
           CALL basfun(xg1(ig1), spl%sp1, fun1, i)
           DO j=1,n2
              CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
              DO ig2=1,ng2
                 CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                 contrib = wg1(ig1)*wg2(ig2) * &
                      &    rhseq(xg1(ig1),xg2(ig2), mbess)
                 DO k1=0,nidbas1
                    i1 = i+k1
                    DO k2=0,nidbas2
                       j2 = MODULO(j+k2-1,n2) + 1
                       ij = j2 + (i1-1)*n2
                       rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
                    END DO
                 END DO
              END DO
           END DO
        END DO
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(xg1, wg1, fun1)
     DEALLOCATE(xg2, wg2, fun2)
 !
   CONTAINS
     DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
       DOUBLE PRECISION, INTENT(in) :: x1, x2
       INTEGER, INTENT(in) :: m
       rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
     END FUNCTION rhseq
   END SUBROUTINE disrhs
 !
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcmat(mat, ny)
 !
 !   Apply BC on matrix
 !
     TYPE(pardiso_mat), INTENT(inout) :: mat
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank, i, j
     DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = mat%rank
     ALLOCATE(zsum(nrank), arr(nrank))
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum on the NY-th row
 !
     zsum = 0.0d0
     DO i=1,ny
        arr = 0.0d0
        CALL getrow(mat, i, arr)
        zsum(:) = zsum(:) + arr(:)
     END DO
     zsum(ny) = SUM(zsum(1:ny))   ! using symmetry
     CALL putrow(mat, ny, zsum)
 !
 !   The away operator
 !
     DO i = 1,ny-1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !  
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     DO i = nrank, nrank-ny+1, -1
        arr = 0.0d0; arr(i) = 1.0d0
        CALL putrow(mat, i, arr)     
     END DO
 !===========================================================================
 !              9.0  Epilogue
 !
     DEALLOCATE(zsum, arr)
 !
   END SUBROUTINE ibcmat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE ibcrhs(rhs, ny)
 !
 !   Apply BC on RHS
 !
     DOUBLE PRECISION, INTENT(inout) :: rhs(:)
     INTEGER, INTENT(in) :: ny
     INTEGER :: nrank
     DOUBLE PRECISION :: zsum
 !===========================================================================
 !              1.0 Prologue
 !
     nrank = SIZE(rhs,1)
 !===========================================================================
 !              2.0 Unicity at the axis
 !
 !   The vertical sum
 !
     zsum = SUM(rhs(1:ny))
     rhs(ny) = zsum
     rhs(1:ny-1) = 0.0d0
 !===========================================================================
 !              3.0 Dirichlet on right boundary
 !
     rhs(nrank-ny+1:nrank) = 0.0d0
   END SUBROUTINE ibcrhs
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE pde2d_mod
 PROGRAM main
   USE pde2d_mod
   USE futils
 !
   IMPLICIT NONE
   INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
   LOGICAL :: nlppform
   INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
   DOUBLE PRECISION :: pi, coefx(5), coefy(5)
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
   TYPE(spline2d) :: splxy
   TYPE(pardiso_mat) :: mat, newmat
 !
   CHARACTER(len=128) :: file='pde2d_sym_pardiso.h5'
   INTEGER :: fid
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
   DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
   DOUBLE PRECISION :: seconds, mem, dopla
   DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
   DOUBLE PRECISION :: tconv, treord
   INTEGER :: nits=100
   LOGICAL :: nlmetis, nlforce_zero, nlpos
 !
   NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlmetis, &
        &            nlpos, nlforce_zero, coefx, coefy
 !===========================================================================
 !              1.0 Prologue
 !
 !   Read in data specific to run
 !
   nx = 8              ! Number of intervals in x
   ny = 8              ! Number of intervals in y
   nidbas = (/3,3/)    ! Degree of splines
   ngauss = (/4,4/)    ! Number of Gauss points/interval
   mbess = 2           ! Exponent of differential problem
   nterms = 2          ! Number of terms in weak form
   nlppform = .TRUE.   ! Use PPFORM for gridval or not
   nlmetis = .FALSE.   ! Use metis ordering or minimum degree
   nlforce_zero = .TRUE. ! Remove existing nodes with zero val in  putele/row/ele
   nlpos = .TRUE.        ! Matrix is positive definite
   coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
   coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
 !
   READ(*,newrun)
   WRITE(*,newrun)
 !
 !   Define grid on x (=radial) & y (=poloidal) axis
 !
   pi = 4.0d0*ATAN(1.0d0)
   ALLOCATE(xgrid(0:nx), ygrid(0:ny))
   xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
   CALL meshdist(coefx, xgrid, nx)
   ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
   CALL meshdist(coefy, ygrid, ny)
 !
 !   Create hdf5 file
 !
   CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
   CALL attach(fid, '/', 'NX', nx)
   CALL attach(fid, '/', 'NY', ny)
   CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
   CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
   CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
   CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
   CALL attach(fid, '/', 'MBESS', mbess)
 !===========================================================================
 !              2.0 Discretize the PDE
 !
 !   Set up spline
 !
   t0 = seconds()
   CALL set_spline(nidbas, ngauss, &
        &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
 !
 !   FE matrix assembly
 !
   nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
   WRITE(*,'(a,i8)') 'nrank', nrank
 !
   CALL init(nrank, nterms, mat, nlsym=.FALSE., nlpos=nlpos, &
        &    nlforce_zero=nlforce_zero)
   CALL dismat(splxy, mat)
   ALLOCATE(arr(nrank))
   IF(nrank.LT.100) THEN
      DO i=1,nrank
         CALL getele(mat, i, i, arr(i))
      END DO
      WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr
   END IF
 !
 !   BC on Matrix
 !
   WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
   WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
   CALL ibcmat(mat, ny)
   IF(nrank.LT.100) THEN
      DO i=1,nrank
         CALL getele(mat, i, i, arr(i))
      END DO
      WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr
      WRITE(*,'(a)') 'Last rows'
      DO i=nrank-ny,nrank
         CALL getrow(mat, i, arr)
         WRITE(*,'(10(1pe12.3))') arr
      END DO
   END IF
   tmat = seconds() - t0
 !
 !   RHS assembly
 !
   ALLOCATE(rhs(nrank), sol(nrank))
   CALL disrhs(mbess, splxy, rhs)
 !
 !   BC on RHS
 !
   CALL ibcrhs(rhs, ny)
 !
   CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
   WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
 !===========================================================================
 !              3.0 Solve the dicretized system
 !
   t0 = seconds()
   CALL to_mat(mat)
   tconv = seconds() -t0
   WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat)
   WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem()
 !
   t0 = seconds()
   CALL reord_mat(mat)
   CALL putmat(fid, '/MAT', mat)
   treord = seconds() - t0
 !
   t0 = seconds()
   CALL numfact(mat)
   tfact = seconds() - t0
  
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
   WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A  = ',mat%p%iparm(18)
   WRITE(*,'(a,i12)')  'Number of factorization MFLOPS      = ',mat%p%iparm(19)
   gflops1 = mat%p%iparm(19) / tfact / 1.d3
 !
   CALL bsolve(mat, rhs, sol)
   WRITE(*,'(/a, 1pe16.8)') 'Norm of sol =', SQRT(DOT_PRODUCT(sol,sol))
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL bsolve(mat, rhs, sol)
      sol(1:ny-1) = sol(ny)
   END DO
   WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
   tsolv = (seconds() - t0)/REAL(nits)
 !
 !   Spline coefficients, taking into account of periodicity in y
 !   Note: in SOL, y was numbered first.
 !
   dimx = splxy%sp1%dim
   dimy = splxy%sp2%dim
   ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
   DO j=0,dimy-1
      DO i=0,dimx-1
         ij = MODULO(j,ny) + i*ny + 1
         bcoef(i,j) = sol(ij)
      END DO
   END DO
   WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
   CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
 !===========================================================================
 !              4.0 Check the solution
 !
 !   Check function values computed with various method
 !
   ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
      END DO
   END DO
   jder = (/0,0/)
 !
 !   Compute PPFORM/BCOEFS at first call to gridval
   CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
 !
   WRITE(*,'(/a)') '*** Checking solutions'
   t0 = seconds()
   DO it=1,nits   ! nits iterations for timing
      CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   END DO
   tgrid = (seconds() - t0)/REAL(nits)
   errsol = solana - solcal
   IF( SIZE(bcoef,2) .LE. 10 ) THEN
      CALL prnmat('BCOEF', bcoef)
      CALL prnmat('SOLANA', solana)
      CALL prnmat('SOLCAL', solcal)
      CALL prnmat('ERRSOL', errsol)
   END IF
   WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
        &    norm2(errsol) / norm2(solana)
   WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
 !
   CALL putarr(fid, '/xgrid', xgrid, 'r')
   CALL putarr(fid, '/ygrid', ygrid, '\theta')
   CALL putarr(fid, '/sol', solcal, 'Solutions')
   CALL putarr(fid, '/solana', solana,'Exact solutions')
   CALL putarr(fid, '/errors', errsol, 'Errors')
 !
 !   Check derivatives d/dx and d/dy
 !
   WRITE(*,'(/a)') '*** Checking gradient'
   DO i=0,nx
      DO j=0,ny
         IF( mbess .EQ. 0 ) THEN
            solana(i,j) = -2.0d0 * xgrid(i)
         ELSE
            solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
              &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
         END IF
      END DO
   END DO
 !
   jder = (/1,0/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   errsol = solana - solcal
   CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
 !
   DO i=0,nx
      DO j=0,ny
         solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
      END DO
   END DO
 !
   jder = (/0,1/)
   CALL gridval(splxy, xgrid, ygrid, solcal, jder)
   CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
   errsol = solana - solcal
   WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
 !
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s)   ', tconv
   WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s)      ', treord
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
   WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s)         ', tsolv
   WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
 !===========================================================================
 !              5.0 Clear the matrix and recompute
 !
   WRITE(*,'(/a)') 'Recompute the solver ...'
   t0 = seconds()
   CALL clear_mat(mat)
   CALL dismat(splxy, mat)
   CALL ibcmat(mat, ny)
   tmat = seconds()-t0
 !
   t0 = seconds()
   CALL numfact(mat)
   tfact = seconds()-t0
   gflops1 = mat%p%iparm(19) / tfact / 1.d3
 !
   t0 = seconds()
   ALLOCATE(newsol(nrank))
   CALL bsolve(mat, rhs, newsol)
   newsol(1:ny-1) = newsol(ny)
   tsolv = seconds()-t0
 !
   WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
   WRITE(*,'(/a)') '---'
   WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
   WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
   WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s)         ', tsolv
   WRITE(*,'(a,1pe12.3)') 'Total (s)                     ', tmat+tfact+tsolv
   WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
 !
 !===========================================================================
 !              6.0 Another matrix to solve
 !
   WRITE(*,'(/a, 1pe16.8)') 'Norm of sol =', SQRT(DOT_PRODUCT(sol,sol))
 !!$  PRINT*, 'current/last matid, matid', current_matid, last_matid, mat%matid
 !
   WRITE(*,'(/a)') ' Another solver ...'
 !
   CALL init(nrank, nterms, newmat, nlsym=.FALSE., nlpos=nlpos, &
        &    nlforce_zero=nlforce_zero)
   CALL mcopy(mat, newmat)
 !!$  CALL clear_mat(newmat)
 !!$  CALL maddto(newmat, 1000.0d0, mat)
   CALL factor(newmat)
 !!$  CALL dismat(splxy, newmat)
 !!$  CALL ibcmat(newmat, ny)
 !!$  CALL to_mat(newmat)
 !!$  CALL reord_mat(newmat)
 !!$  CALL numfact(newmat)
   CALL bsolve(newmat, rhs, newsol)
   WRITE(*,'(/a, 1pe16.8)') 'Norm of newsol =', SQRT(DOT_PRODUCT(newsol,newsol))
 !!$  PRINT*, 'current/last matid, matid', current_matid, last_matid, newmat%matid
 !
   CALL bsolve(mat, rhs, sol)
   WRITE(*,'(/a, 1pe16.8)') 'Norm of sol =', SQRT(DOT_PRODUCT(sol,sol))
 !!$  PRINT*, 'current/last matid, matid', current_matid, last_matid, mat%matid
 !
 !===========================================================================
 
 !              9.0  Epilogue
 !
   DEALLOCATE(xgrid, rhs, sol)
   DEALLOCATE(solcal, solana, errsol)
   DEALLOCATE(bcoef)
   DEALLOCATE(arr)
   DEALLOCATE(newsol)
   CALL destroy_sp(splxy)
   CALL destroy(mat)
   CALL destroy(newmat)
 !
   CALL closef(fid)
 !===========================================================================
 !
 CONTAINS
   FUNCTION norm2(x)
 !
 !  Compute the 2-norm of array x
 !
     IMPLICIT NONE
     DOUBLE PRECISION :: norm2
     DOUBLE PRECISION, INTENT(in) :: x(:,:)
     DOUBLE PRECISION :: sum2
     INTEGER :: i, j
 !
     sum2 = 0.0d0
     DO i=1,SIZE(x,1)
        DO j=1,SIZE(x,2)
           sum2 = sum2 + x(i,j)**2
        END DO
     END DO
     norm2 = SQRT(sum2)
   END FUNCTION norm2
   SUBROUTINE prnmat(label, mat)
     CHARACTER(len=*) :: label
     DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
     INTEGER :: i
     WRITE(*,'(/a)') TRIM(label)
     DO i=1,SIZE(mat,1)
        WRITE(*,'(10(1pe12.3))') mat(i,:)
     END DO
   END SUBROUTINE prnmat
 END PROGRAM main
 !
 !+++
 SUBROUTINE meshdist(c, x, nx)
 !
 !   Construct an 1d  non-equidistant mesh given a
 !   mesh distribution function.
 !
   IMPLICIT NONE
   DOUBLE PRECISION, INTENT(in) :: c(5)
   INTEGER, INTENT(iN) :: nx
   DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
   INTEGER :: nintg
   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
   DOUBLE PRECISION :: a, b, dx, f0, f1, scal
   INTEGER :: i, k
 !
   a=x(0)
   b=x(nx)
   nintg = 10*nx
   ALLOCATE(xint(0:nintg), fint(0:nintg))
 !
 !  Mesh distribution
 !
   dx = (b-a)/REAL(nintg)
   xint(0) = a
   fint(0) = 0.0d0
   f1 = fdist(xint(0))
   DO i=1,nintg
      f0 = f1
      xint(i) = xint(i-1) + dx
      f1 = fdist(xint(i))
      fint(i) = fint(i-1) + 0.5*(f0+f1)
   END DO
 !
 !  Normalization
 !
   scal = REAL(nx) / fint(nintg)
   fint(0:nintg) = fint(0:nintg) * scal
 !!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
 !
 !  Obtain mesh point by (inverse) interpolation
 !
   k = 1
   DO i=1,nintg-1
      IF( fint(i) .GE. REAL(k) ) THEN
         x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
              &   (k-fint(i))
         k = k+1
      END IF
   END DO
 !
   DEALLOCATE(xint, fint)
 CONTAINS
   DOUBLE PRECISION FUNCTION fdist(x)
     DOUBLE PRECISION, INTENT(in) :: x
     fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
   END FUNCTION fdist
 END SUBROUTINE meshdist
 !+++
diff --git a/src/wsmp_mod.f90 b/src/wsmp_mod.f90
index 8026640..ff71b7f 100644
--- a/src/wsmp_mod.f90
+++ b/src/wsmp_mod.f90
@@ -1,1835 +1,1835 @@
 !>
 !> @file wsmp_mod.f90
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 MODULE wsmp_bsplines
 !
 !    WSMP_BSPLINES: Simple interface to the sparse direct solver WSMP.
 !
 !    T.M. Tran, CRPP-EPFL
 !    November 2011
 !
   USE sparse
   IMPLICIT NONE
 !
   INTEGER, SAVE :: current_matid = -1
   INTEGER, SAVE :: last_matid = -1
 !
   TYPE wsmp_param
      INTEGER          :: iparm(64)
      DOUBLE PRECISION :: dparm(64)
   END TYPE wsmp_param
 !
   TYPE wsmp_mat
      INTEGER :: matid=-1
      INTEGER :: rank=0, nnz
      INTEGER :: nterms, kmat, nrhs
      LOGICAL :: nlsym
      LOGICAL :: nlpos
      LOGICAL :: nlforce_zero
      TYPE(spmat), POINTER      :: mat => NULL()
      INTEGER, POINTER          :: cols(:) => NULL()
      INTEGER, POINTER          :: irow(:) => NULL()
      INTEGER, POINTER          :: perm(:) => NULL()
      INTEGER, POINTER          :: invp(:) => NULL()
      INTEGER, POINTER          :: mrp(:) => NULL()
      DOUBLE PRECISION, POINTER :: diag(:) => NULL()
      DOUBLE PRECISION, POINTER :: val(:) => NULL()
      DOUBLE PRECISION, POINTER :: aux(:) => NULL()
      TYPE(wsmp_param)       :: p
   END TYPE wsmp_mat
 !
   TYPE zwsmp_mat
      INTEGER :: matid=-1
      INTEGER :: rank=0, nnz
      INTEGER :: nterms, kmat, nrhs
      LOGICAL :: nlherm
      LOGICAL :: nlsym
      LOGICAL :: nlpos
      LOGICAL :: nlforce_zero
      TYPE(zspmat), POINTER     :: mat => NULL()
      INTEGER, POINTER          :: cols(:) => NULL()
      INTEGER, POINTER          :: irow(:) => NULL()
      INTEGER, POINTER          :: perm(:) => NULL()
      INTEGER, POINTER          :: invp(:) => NULL()
      INTEGER, POINTER          :: mrp(:) => NULL()
      DOUBLE COMPLEX, POINTER :: diag(:) => NULL()
      DOUBLE COMPLEX, POINTER :: val(:) => NULL()
      DOUBLE COMPLEX, POINTER :: aux(:) => NULL()
      TYPE(wsmp_param)       :: p
   END TYPE zwsmp_mat
 !
   INTERFACE init
      MODULE PROCEDURE init_wsmp_mat, init_zwsmp_mat
   END INTERFACE init
 !
   INTERFACE check_mat
      MODULE PROCEDURE check_wsmp_mat, check_zwsmp_mat
   END INTERFACE check_mat
 !
   INTERFACE clear_mat
      MODULE PROCEDURE clear_wsmp_mat, clear_zwsmp_mat
   END INTERFACE clear_mat
 !
   INTERFACE updtmat
      MODULE PROCEDURE updt_wsmp_mat, updt_zwsmp_mat
   END INTERFACE updtmat
 !
   INTERFACE putele
      MODULE PROCEDURE putele_wsmp_mat, putele_zwsmp_mat
   END INTERFACE putele
 !
   INTERFACE getele
      MODULE PROCEDURE getele_wsmp_mat, getele_zwsmp_mat
   END INTERFACE getele
 !
   INTERFACE putrow
      MODULE PROCEDURE putrow_wsmp_mat, putrow_zwsmp_mat
   END INTERFACE putrow
 !
   INTERFACE getrow
      MODULE PROCEDURE getrow_wsmp_mat, getrow_zwsmp_mat
   END INTERFACE getrow
 !
   INTERFACE putcol
      MODULE PROCEDURE putcol_wsmp_mat, putcol_zwsmp_mat
   END INTERFACE putcol
 !
   INTERFACE getcol
      MODULE PROCEDURE getcol_wsmp_mat, getcol_zwsmp_mat
   END INTERFACE getcol
 !
   INTERFACE get_count
      MODULE PROCEDURE get_count_wsmp_mat, get_count_zwsmp_mat
   END INTERFACE get_count
 !
   INTERFACE to_mat
      MODULE PROCEDURE to_wsmp_mat, to_zwsmp_mat
   END INTERFACE to_mat
 !
   INTERFACE reord_mat
      MODULE PROCEDURE reord_wsmp_mat, reord_zwsmp_mat
   END INTERFACE reord_mat
 !
   INTERFACE numfact
      MODULE PROCEDURE numfact_wsmp_mat, numfact_zwsmp_mat
   END INTERFACE numfact
 !
   INTERFACE factor
      MODULE PROCEDURE factor_wsmp_mat, factor_zwsmp_mat
   END INTERFACE factor
 !
   INTERFACE bsolve
      MODULE PROCEDURE bsolve_wsmp_mat1,  bsolve_wsmp_matn, &
           &           bsolve_zwsmp_mat1, bsolve_zwsmp_matn
   END INTERFACE bsolve
 !
   INTERFACE vmx
      MODULE PROCEDURE vmx_wsmp_mat,  vmx_wsmp_matn, &
           &           vmx_zwsmp_mat, vmx_zwsmp_matn
   END INTERFACE vmx
 !
   INTERFACE destroy
      MODULE PROCEDURE destroy_wsmp_mat, destroy_zwsmp_mat
  END INTERFACE destroy
 !
  INTERFACE putmat
     MODULE PROCEDURE put_wsmp_mat, put_zwsmp_mat
  END INTERFACE putmat
 !
  INTERFACE getmat
     MODULE PROCEDURE get_wsmp_mat, get_zwsmp_mat
  END INTERFACE getmat
 !
  INTERFACE mcopy
     MODULE PROCEDURE mcopy_wsmp_mat,  mcopy_zwsmp_mat
  END INTERFACE mcopy
 !
  INTERFACE maddto
     MODULE PROCEDURE maddto_wsmp_mat, maddto_zwsmp_mat
  END INTERFACE maddto
 !
  INTERFACE psum_mat
     MODULE PROCEDURE psum_wsmp_mat, psum_zwsmp_mat
  END INTERFACE psum_mat
 !
  INTERFACE p2p_mat
     MODULE PROCEDURE p2p_wsmp_mat, p2p_zwsmp_mat
  END INTERFACE p2p_mat
 !
 CONTAINS
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE init_wsmp_mat(n, nterms, mat, kmat, nlsym, nlpos, &
        &                   nlforce_zero)
 !
 !   Initialize an empty sparse wsmp matrix
 !
     INTEGER, INTENT(in)           :: n, nterms
     TYPE(wsmp_mat)                :: mat
     INTEGER, OPTIONAL, INTENT(in) :: kmat
     LOGICAL, OPTIONAL, INTENT(in) :: nlsym
     LOGICAL, OPTIONAL, INTENT(in) :: nlpos
     LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
     INTEGER :: info
     INTEGER :: idummy = 0
     DOUBLE PRECISION :: dummy = 0.0d0
 !
 !  Store away (valid) current matrix id
 !
     IF(current_matid .GE. 0) THEN
        CALL wstoremat(current_matid, info)
        IF(info.NE.0) THEN
           WRITE(*,'(a,i4)') 'INIT: WSTOREMAT failed WITH error', info
           STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
        END IF
     END IF
     last_matid = last_matid+1
     mat%matid = last_matid
     current_matid = mat%matid
 !
 !  Initialize sparse matrice structure
 !
     mat%rank = n
     mat%nterms = nterms
     mat%nnz = 0
     mat%nlsym = .FALSE.
     mat%nlpos = .TRUE.
     mat%nrhs = 1
     mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
     IF(PRESENT(kmat)) mat%kmat = kmat
     IF(PRESENT(nlsym)) mat%nlsym = nlsym
     IF(PRESENT(nlpos)) mat%nlpos = nlpos
     IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
     IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat)
     ALLOCATE(mat%mat)
     CALL init(n, mat%mat)
 !
 !  Fill 'iparm' and 'dparm' with default values
 !
     mat%p%iparm(1:3) = 0
     IF(mat%nlsym) THEN
        CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
             &      mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, &
             &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
             &      mat%p%dparm)
        IF(mat%nlpos) THEN
           mat%p%iparm(31) = 0
        ELSE
 !!$          mat%p%iparm(31) = 1  ! LDL^T without pivoting
           mat%p%iparm(31) = 2  ! LDL^T with pivoting
        END IF
     ELSE
        CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, &
             &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
    END IF
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i4)') 'INIT: Initialization failed with error', &
             &             mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
 !!$    WRITE(*,'(/a/(10i8))') 'iparm', mat%p%iparm
 !!$    WRITE(*,'(/a/(10(1pe8.1)))') 'dparm', mat%p%dparm
 !
     CALL setup_wsmp(mat%p%iparm, mat%p%dparm)
 !
   CONTAINS
     SUBROUTINE setup_wsmp(iparm, dparm)
       INTEGER          :: iparm(:)
       DOUBLE PRECISION :: dparm(:)
     END SUBROUTINE setup_wsmp
   END SUBROUTINE init_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE init_zwsmp_mat(n, nterms, mat, kmat, nlsym, nlherm, &
        &                   nlpos, nlforce_zero)
 !
 !   Initialize an empty sparse wsmp matrix
 !
     INTEGER, INTENT(in)           :: n, nterms
     TYPE(zwsmp_mat)               :: mat
     INTEGER, OPTIONAL, INTENT(in) :: kmat
     LOGICAL, OPTIONAL, INTENT(in) :: nlsym
     LOGICAL, OPTIONAL, INTENT(in) :: nlherm
     LOGICAL, OPTIONAL, INTENT(in) :: nlpos
     LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
     INTEGER :: info
     INTEGER :: idummy = 0
     DOUBLE COMPLEX :: dummy = 0.0d0
 !
 !  Store away (valid) current matrix id
 !
     IF(current_matid .GE. 0) THEN
        CALL wstoremat(current_matid, info)
        IF(info.NE.0) THEN
           WRITE(*,'(a,i4)') 'INIT: WSTOREMAT failed WITH error', info
           STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
        END IF
     END IF
     last_matid = last_matid+1
     mat%matid = last_matid
     current_matid = mat%matid
 !
 !  Initialize sparse matrice structure
 !
     mat%rank = n
     mat%nterms = nterms
     mat%nnz = 0
     mat%nlsym = .FALSE.
     mat%nlherm = .FALSE.
     mat%nlpos = .TRUE.
     mat%nrhs = 1
     mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
     IF(PRESENT(kmat)) mat%kmat = kmat
     IF(PRESENT(nlsym)) mat%nlsym = nlsym
     IF(PRESENT(nlherm)) mat%nlherm = nlherm
     IF(PRESENT(nlpos)) mat%nlpos = nlpos
     IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
     IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat)
     ALLOCATE(mat%mat)
     CALL init(n, mat%mat)
 !
 !  Fill 'iparm' and 'dparm' with default values
 !
     mat%p%iparm(1:3) = 0
     IF(mat%nlherm .OR. mat%nlsym) THEN
        CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
             &      mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, &
             &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
             &      mat%p%dparm)
        IF(mat%nlherm) THEN
           IF(mat%nlpos) THEN
              mat%p%iparm(31) = 0  ! hermitian, positive definite 
           ELSE
              mat%p%iparm(31) = 2  ! hermitian, no-definite, LDL^T with pivoting
           END IF
        ELSE
           mat%p%iparm(31) = 3     ! non-hermitian, symmetric
        END IF
     ELSE
        CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, &
             &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
    END IF
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i4)') 'INIT: Initialization failed with error', &
             &             mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
 !!$    WRITE(*,'(/a/(10i8))') 'iparm', mat%p%iparm
 !!$    WRITE(*,'(/a/(10(1pe8.1)))') 'dparm', mat%p%dparm
 !
     CALL setup_wsmp(mat%p%iparm, mat%p%dparm)
 !
   CONTAINS
     SUBROUTINE setup_wsmp(iparm, dparm)
       INTEGER          :: iparm(:)
       DOUBLE PRECISION :: dparm(:)
     END SUBROUTINE setup_wsmp
   END SUBROUTINE init_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE check_wsmp_mat(mat)
 !
 !  Check matrice id and  recall the matrice if not current
 !
     TYPE(wsmp_mat) :: mat
     INTEGER :: info
 !
     IF(.NOT.mat%nlsym) THEN
        IF( mat%matid.NE.current_matid ) THEN      
           WRITE(*,'(a)') "Processing multi matrices is not possible "// &
                &         "for non-symetric matrices."
           STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
        ELSE
           RETURN
        END IF
     END IF
 !
     IF( mat%matid.NE.current_matid ) THEN
        IF(current_matid .GE. 0) THEN
           CALL wstoremat(current_matid, info)
           IF(info.NE.0) THEN
              WRITE(*,'(a,i3,a,i4)') 'Store matrix', current_matid, &
                   &               ' failed with error', info
              STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
           END IF
        END IF
        CALL wrecallmat(mat%matid, info)
        IF(info.NE.0) THEN
           WRITE(*,'(a,i3,a,i4)') 'Recall matrix', mat%matid, &
                &              ' failed with error', info
           STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
        END IF
        current_matid = mat%matid
     END IF
   END SUBROUTINE check_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE check_zwsmp_mat(mat)
 !
 !  Check matrice id and  recall the matrice if not current
 !
     TYPE(zwsmp_mat) :: mat
     INTEGER :: info
 !
     IF(.NOT.mat%nlsym .AND. .NOT.mat%nlherm ) THEN
        IF( mat%matid.NE.current_matid ) THEN      
           WRITE(*,'(a)') "Processing multi matrices is not possible "// &
                &         "for non-symetric/non-hermitian matrices."
           STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
        ELSE
           RETURN
        END IF
     END IF
 !
     IF( mat%matid.NE.current_matid ) THEN
        IF(current_matid .GE. 0) THEN
           CALL wstoremat(current_matid, info)
           IF(info.NE.0) THEN
              WRITE(*,'(a,i3,a,i4)') 'Store matrix', current_matid, &
                   &               ' failed with error', info
              STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
           END IF
        END IF
        CALL wrecallmat(mat%matid, info)
        IF(info.NE.0) THEN
           WRITE(*,'(a,i3,a,i4)') 'Recall matrix', mat%matid, &
                &              ' failed with error', info
           STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
        END IF
        current_matid = mat%matid
     END IF
   END SUBROUTINE check_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE clear_wsmp_mat(mat)
 !
 !   Clear matrix, keeping its sparse structure unchanged
 !
     TYPE(wsmp_mat) :: mat
 !
     mat%val = 0.0d0
   END SUBROUTINE clear_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE clear_zwsmp_mat(mat)
 !
 !   Clear matrix, keeping its sparse structure unchanged
 !
     TYPE(zwsmp_mat) :: mat
 !
     mat%val = (0.0d0, 0.0d0)
   END SUBROUTINE clear_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE updt_wsmp_mat(mat, i, j, val)
 !
 !   Update element Aij of wsmp  matrix
 !
     TYPE(wsmp_mat)            :: mat
     INTEGER, INTENT(in)          :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
     INTEGER :: k, s, e
 !
     IF(mat%nlsym) THEN   ! Store only upper part for symmetric matrices
        IF(i.GT.j) RETURN
     END IF
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL updtmat(mat%mat, i, j, val)
     ELSE
        s = mat%irow(i)
        e = mat%irow(i+1)-1
        k = isearch(mat%cols(s:e), j)
        IF( k.GE.0 ) THEN
           mat%val(s+k) =  mat%val(s+k)+val
        ELSE
           WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
           STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
        END IF
     END IF
   END SUBROUTINE updt_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE updt_zwsmp_mat(mat, i, j, val)
 !
 !   Update element Aij of wsmp  matrix
 !
     TYPE(zwsmp_mat)            :: mat
     INTEGER, INTENT(in)        :: i, j
     DOUBLE COMPLEX, INTENT(in) :: val
     INTEGER :: k, s, e
 !
     IF(mat%nlherm .OR. mat%nlsym) THEN   ! Store only upper part
        IF(i.GT.j) RETURN
     END IF
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL updtmat(mat%mat, i, j, val)
     ELSE
        s = mat%irow(i)
        e = mat%irow(i+1)-1
        k = isearch(mat%cols(s:e), j)
        IF( k.GE.0 ) THEN
           IF(mat%nlherm) THEN
              mat%val(s+k) =  mat%val(s+k)+CONJG(val)  ! CSR-UT* = CSC-LT
           ELSE
              mat%val(s+k) =  mat%val(s+k)+val
        END IF
        ELSE
           WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
           STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
        END IF
     END IF
   END SUBROUTINE updt_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putele_wsmp_mat(mat, i, j, val)
 !
 !  Put element (i,j) of matrix
 !
     TYPE(wsmp_mat)            :: mat
     INTEGER, INTENT(in)          :: i, j
     DOUBLE PRECISION, INTENT(in) :: val
     INTEGER :: iput, jput
     INTEGER :: k, s, e
 !
     iput = i
     jput = j
     IF(mat%nlsym) THEN
        IF( i.GT.j ) THEN    ! Lower triangular part
           iput = j
           jput = i
        END IF
     END IF
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL putele(mat%mat, iput, jput, val, &
             &      nlforce_zero=mat%nlforce_zero)
     ELSE
        s = mat%irow(iput)
        e = mat%irow(iput+1)-1
        k = isearch(mat%cols(s:e), jput)
        IF( k.GE.0 ) THEN
           mat%val(s+k) =  val
        ELSE
           IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
              WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
              STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
           END IF
        END IF
    END IF
   END SUBROUTINE putele_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putele_zwsmp_mat(mat, i, j, val)
 !
 !  Put element (i,j) of matrix
 !
     TYPE(zwsmp_mat)            :: mat
     INTEGER, INTENT(in)        :: i, j
     DOUBLE COMPLEX, INTENT(in) :: val
     DOUBLE COMPLEX :: valput
     INTEGER :: iput, jput
     INTEGER :: k, s, e
 !
     iput = i
     jput = j
     valput = val
     IF(mat%nlsym .OR. mat%nlherm) THEN
        IF( i.GT.j ) THEN    ! Lower triangular part
           iput = j
           jput = i
           IF(mat%nlherm) THEN
              valput = CONJG(val)
           ELSE
              valput = val
           END IF
        END IF
     END IF
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL putele(mat%mat, iput, jput, valput, &
             &      nlforce_zero=mat%nlforce_zero)
     ELSE
        s = mat%irow(iput)
        e = mat%irow(iput+1)-1
        k = isearch(mat%cols(s:e), jput)
        IF( k.GE.0 ) THEN
           IF(mat%nlherm) THEN
              mat%val(s+k) = CONJG(valput)   ! CSR-UT* = CSC-LT
           ELSE
              mat%val(s+k) =  valput
           END IF
        ELSE
           IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
              WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
              STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
           END IF
        END IF
    END IF
  END SUBROUTINE putele_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getele_wsmp_mat(mat, i, j, val)
 !
 !   Get element (i,j) of sparse matrix
 !
     TYPE(wsmp_mat)             :: mat
     INTEGER, INTENT(in)           :: i, j
     DOUBLE PRECISION, INTENT(out) :: val
     INTEGER :: iget, jget
     INTEGER :: k, s, e
 !
     iget = i
     jget = j
     IF(mat%nlsym) THEN
        IF( i.GT.j ) THEN    ! Lower triangular part
           iget = j
           jget = i
        END IF
     END IF
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL getele(mat%mat, iget, jget, val)
     ELSE
        s = mat%irow(iget)
        e = mat%irow(iget+1)-1
        k = isearch(mat%cols(s:e), jget)
        IF( k.GE.0 ) THEN
           val =mat%val(s+k) 
        ELSE
           val = 0.0d0   ! Assume zero val if not found
        END IF
     END IF
   END SUBROUTINE getele_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getele_zwsmp_mat(mat, i, j, val)
 !
 !   Get element (i,j) of sparse matrix
 !
     TYPE(zwsmp_mat)             :: mat
     INTEGER, INTENT(in)         :: i, j
     DOUBLE COMPLEX, INTENT(out) :: val
     DOUBLE COMPLEX :: valget
     INTEGER :: iget, jget
     INTEGER :: k, s, e
 !
     iget = i
     jget = j
     IF(mat%nlherm .OR. mat%nlsym) THEN
        IF( i.GT.j ) THEN    ! Lower triangular part
           iget = j
           jget = i
        END IF
     END IF
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL getele(mat%mat, iget, jget, valget)
     ELSE
        s = mat%irow(iget)
        e = mat%irow(iget+1)-1
        k = isearch(mat%cols(s:e), jget)
        IF( k.GE.0 ) THEN
           IF(mat%nlherm) THEN
              valget = CONJG(mat%val(s+k))   ! CSR-UT* = CSC-LT
           ELSE
              valget = mat%val(s+k) 
           END IF
        ELSE
           valget = (0.0d0,0.0d0)   ! Assume zero val if not found
        END IF
     END IF
     val = valget
     IF( i.GT.j ) THEN
        IF(mat%nlherm) THEN 
           val = CONJG(valget)
        END IF
     END IF
   END SUBROUTINE getele_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putrow_wsmp_mat(amat, i, arr)
 !
 ! Put a row into sparse matrix
 !
     TYPE(wsmp_mat), INTENT(inout) :: amat
     INTEGER, INTENT(in)              :: i
     DOUBLE PRECISION, INTENT(in)     :: arr(:)
     INTEGER :: j
 !
     DO j=1,amat%rank
        CALL putele(amat, i, j, arr(j))
     END DO
   END SUBROUTINE putrow_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putrow_zwsmp_mat(amat, i, arr)
 !
 ! Put a row into sparse matrix
 !
     TYPE(zwsmp_mat), INTENT(inout) :: amat
     INTEGER, INTENT(in)            :: i
     DOUBLE COMPLEX, INTENT(in)     :: arr(:)
     INTEGER :: j
 !
     DO j=1,amat%rank
        CALL putele(amat, i, j, arr(j))
     END DO
   END SUBROUTINE putrow_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getrow_wsmp_mat(amat, i, arr)
 !
 !   Get a row from sparse matrix
 !
     TYPE(wsmp_mat), INTENT(in)  :: amat
     INTEGER, INTENT(in)            :: i
     DOUBLE PRECISION, INTENT(out)  :: arr(:)
     INTEGER :: j
 !
     DO j=1,amat%rank
        CALL getele(amat, i, j, arr(j))
     END DO
   END SUBROUTINE getrow_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getrow_zwsmp_mat(amat, i, arr)
 !
 !   Get a row from sparse matrix
 !
     TYPE(zwsmp_mat), INTENT(in)  :: amat
     INTEGER, INTENT(in)          :: i
     DOUBLE COMPLEX, INTENT(out)  :: arr(:)
     INTEGER :: j
 !
     DO j=1,amat%rank
        CALL getele(amat, i, j, arr(j))
     END DO
   END SUBROUTINE getrow_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putcol_wsmp_mat(amat, j, arr)
 !
 ! Put a column into sparse matrix
 !
     TYPE(wsmp_mat), INTENT(inout) :: amat
     INTEGER, INTENT(in)              :: j
     DOUBLE PRECISION, INTENT(in)     :: arr(:)
     INTEGER :: i
 !
     DO i=1,amat%rank
        CALL putele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE putcol_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE putcol_zwsmp_mat(amat, j, arr)
 !
 ! Put a column into sparse matrix
 !
     TYPE(zwsmp_mat), INTENT(inout) :: amat
     INTEGER, INTENT(in)            :: j
     DOUBLE COMPLEX, INTENT(in)     :: arr(:)
     INTEGER :: i
 !
     DO i=1,amat%rank
        CALL putele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE putcol_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getcol_wsmp_mat(amat, j, arr)
 !
 !   Get a column from sparse matrix
 !
     TYPE(wsmp_mat), INTENT(in)  :: amat
     INTEGER, INTENT(in)            :: j
     DOUBLE PRECISION, INTENT(out)  :: arr(:)
     INTEGER :: i
 !
     DO i=1,amat%rank
        CALL getele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE getcol_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE getcol_zwsmp_mat(amat, j, arr)
 !
 !   Get a column from sparse matrix
 !
     TYPE(zwsmp_mat), INTENT(in)  :: amat
     INTEGER, INTENT(in)          :: j
     DOUBLE COMPLEX, INTENT(out)  :: arr(:)
     INTEGER :: i
 !
     DO i=1,amat%rank
        CALL getele(amat, i, j, arr(i))
     END DO
   END SUBROUTINE getcol_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   INTEGER FUNCTION get_count_wsmp_mat(mat, nnz)
 !
 !   Number of non-zeros in sparse matrix
 !
     TYPE(wsmp_mat)                 :: mat
     INTEGER, INTENT(out), OPTIONAL :: nnz(:)
     INTEGER :: i
 !
     IF(ASSOCIATED(mat%mat)) THEN
        get_count_wsmp_mat = get_count(mat%mat, nnz)
     ELSE
        get_count_wsmp_mat = mat%nnz
        IF(PRESENT(nnz)) THEN
           DO i=1,mat%rank
              nnz(i) = mat%irow(i+1)-mat%irow(i)
           END DO
        END IF
     END IF
   END FUNCTION get_count_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   INTEGER FUNCTION get_count_zwsmp_mat(mat, nnz)
 !
 !   Number of non-zeros in sparse matrix
 !
     TYPE(zwsmp_mat)                :: mat
     INTEGER, INTENT(out), OPTIONAL :: nnz(:)
     INTEGER :: i
 !
     IF(ASSOCIATED(mat%mat)) THEN
        get_count_zwsmp_mat = get_count(mat%mat, nnz)
     ELSE
        get_count_zwsmp_mat = mat%nnz
        IF(PRESENT(nnz)) THEN
           DO i=1,mat%rank
              nnz(i) = mat%irow(i+1)-mat%irow(i)
           END DO
        END IF
     END IF
   END FUNCTION get_count_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE to_wsmp_mat(mat, nlkeep)
 !
 !   Convert linked list spmat to wsmp matrice structure
 !
     TYPE(wsmp_mat) :: mat
     LOGICAL, INTENT(in), OPTIONAL :: nlkeep
     INTEGER :: i, nnz, rank, s, e
     LOGICAL :: nlclean
 !
     nlclean = .TRUE.
     IF(PRESENT(nlkeep)) THEN
        nlclean = .NOT. nlkeep
     END IF
 !
 !    Allocate the WSMP matrix structure
 !
     nnz = get_count(mat)
     rank = mat%rank
     mat%nnz = nnz
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
     IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     ALLOCATE(mat%val(nnz))
     IF(mat%nlsym) THEN
        ALLOCATE(mat%perm(rank))
        ALLOCATE(mat%invp(rank))
     END IF
     ALLOCATE(mat%irow(rank+1))
     ALLOCATE(mat%cols(nnz))
 !
 !    Fill WSMP structure and deallocate the sparse rows
 !
     mat%irow = 1
     DO i=1,rank
        mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz
        s = mat%irow(i)
        e = mat%irow(i+1)-1
        CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e))
        IF(nlclean) CALL destroy(mat%mat%row(i))
     END DO
     IF(nlclean) DEALLOCATE(mat%mat)
   END SUBROUTINE to_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE to_zwsmp_mat(mat, nlkeep)
 !
 !   Convert linked list spmat to wsmp matrice structure
 !
     TYPE(zwsmp_mat) :: mat
     LOGICAL, INTENT(in), OPTIONAL :: nlkeep
     INTEGER :: i, nnz, rank, s, e
     LOGICAL :: nlclean
 !
     nlclean = .TRUE.
     IF(PRESENT(nlkeep)) THEN
        nlclean = .NOT. nlkeep
     END IF
 !
 !    Allocate the WSMP matrix structure
 !
     nnz = get_count(mat)
     rank = mat%rank
     mat%nnz = nnz
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
     IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     ALLOCATE(mat%val(nnz))
     IF(mat%nlsym) THEN
        ALLOCATE(mat%perm(rank))
        ALLOCATE(mat%invp(rank))
     END IF
     ALLOCATE(mat%irow(rank+1))
     ALLOCATE(mat%cols(nnz))
 !
 !    Fill WSMP structure and deallocate the sparse rows
 !
     mat%irow = 1
     DO i=1,rank
        mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz
        s = mat%irow(i)
        e = mat%irow(i+1)-1
        CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e))
        IF(nlclean) CALL destroy(mat%mat%row(i))
     END DO
     IF(mat%nlherm) THEN
        mat%val(:) = CONJG(mat%val(:))  ! CSR-UT* = CSC-LT
     END IF
     IF(nlclean) DEALLOCATE(mat%mat)
   END SUBROUTINE to_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE reord_wsmp_mat(mat)
 !
 !   Reordering and symbolic factorization
 !
     TYPE(wsmp_mat) :: mat
     DOUBLE PRECISION :: dummy
 !
 !    Recall the matrice if not current
 !
     CALL check_mat(mat)
 !
     IF(mat%nlsym) THEN
        mat%p%iparm(2) = 1  ! Ordering
        mat%p%iparm(3) = 2  ! Symbolic factorization
        CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
             &      mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, &
             &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
             &      mat%p%dparm)
     ELSE
        mat%p%iparm(2) = 1  ! Analysis and reordering
        mat%p%iparm(3) = 1
        CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, &
             &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
     END IF
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i4)') 'REORD: Reordering failed with error', mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
   END SUBROUTINE reord_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE reord_zwsmp_mat(mat)
 !
 !   Reordering and symbolic factorization
 !
     TYPE(zwsmp_mat)             :: mat
     DOUBLE COMPLEX :: dummy
 !
 !    Recall the matrice if not current
 !
     CALL check_mat(mat)
 !
     IF(mat%nlsym .OR. mat%nlherm) THEN
        mat%p%iparm(2) = 1  ! Ordering
        mat%p%iparm(3) = 2  ! Symbolic factorization
        CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
             &      mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, &
             &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
             &      mat%p%dparm)
 !!$       WRITE(*,'(a,i3/(10i8))') 'REORD: matrice', mat%matid, mat%perm
     ELSE
        mat%p%iparm(2) = 1  ! Analysis and reordering
        mat%p%iparm(3) = 1
        CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, &
             &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
     END IF
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i4)') 'REORD: Reordering failed with error', mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
   END SUBROUTINE reord_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE numfact_wsmp_mat(mat)
 !
 !   Numerical factorization
 !
     TYPE(wsmp_mat) :: mat
     DOUBLE PRECISION :: dummy
 !
 !    Recall the matrice if not current
 !
     CALL check_mat(mat)
 !
     IF(mat%nlsym) THEN
        mat%p%iparm(2) = 3   ! Numerical factorization
        mat%p%iparm(3) = 3
        CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
             &      mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, &
             &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
             &      mat%p%dparm)
     ELSE
        mat%p%iparm(2) = 2  ! Factorization
        mat%p%iparm(3) = 2
        CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, &
             &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
     END IF
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i4)') 'NUMFACT: Num. Factor failed with error', mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
   END SUBROUTINE numfact_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE numfact_zwsmp_mat(mat)
 !
 !   Numerical factorization
 !
     TYPE(zwsmp_mat) :: mat
     DOUBLE COMPLEX :: dummy
 !
 !    Recall the matrice if not current
 !
     CALL check_mat(mat)
 !
     IF(mat%nlsym .OR. mat%nlherm) THEN
        mat%p%iparm(2) = 3   ! Numerical factorization
        mat%p%iparm(3) = 3
        CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
             &      mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, &
             &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
             &      mat%p%dparm)
     ELSE
        mat%p%iparm(2) = 2  ! Factorization
        mat%p%iparm(3) = 2
        CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, &
             &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
     END IF
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i4)') 'NUMFACT: Num. Factor failed with error', mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
   END SUBROUTINE numfact_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE factor_wsmp_mat(mat, nlreord)
 !
 !  Factor (create  +reorder + factor) a wsmp_mat matrix
 !
     TYPE(wsmp_mat)                :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: nlreord
     LOGICAL :: mlreord
 !----------------------------------------------------------------------
 !               1.0  Creation from the sparse rows
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL to_mat(mat)
     END IF
 !----------------------------------------------------------------------
 !               2.0  Reordering and symbolic factorization phase
 !
     mlreord = .TRUE.
     IF(PRESENT(nlreord)) mlreord = nlreord
     IF(mlreord) THEN
        CALL reord_mat(mat)
     END IF
 !----------------------------------------------------------------------
 !               3.0  Numerical factorization
 !
     CALL numfact(mat)
   END SUBROUTINE factor_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE factor_zwsmp_mat(mat, nlreord)
 !
 !  Factor (create  +reorder + factor) a wsmp_mat matrix
 !
     TYPE(zwsmp_mat)               :: mat
     LOGICAL, OPTIONAL, INTENT(in) :: nlreord
     LOGICAL :: mlreord
 !----------------------------------------------------------------------
 !               1.0  Creation from the sparse rows
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL to_mat(mat)
     END IF
 !----------------------------------------------------------------------
 !               2.0  Reordering and symbolic factorization phase
 !
     mlreord = .TRUE.
     IF(PRESENT(nlreord)) mlreord = nlreord
     IF(mlreord) THEN
        CALL reord_mat(mat)
     END IF
 !----------------------------------------------------------------------
 !               3.0  Numerical factorization
 !
     CALL numfact(mat)
   END SUBROUTINE factor_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE bsolve_wsmp_mat1(mat, rhs, sol, nref)
 !
 !   Backsolve, using Wsmp
 !
     TYPE(wsmp_mat)             :: mat
     DOUBLE PRECISION           :: rhs(:)
     DOUBLE PRECISION, OPTIONAL :: sol(:)
     INTEGER, OPTIONAL          :: nref
     DOUBLE PRECISION :: dummy
 !
 !    Recall the matrice if not current
 !
     CALL check_mat(mat)
 !
     IF(mat%nlsym) THEN
        mat%p%iparm(2) = 4  ! Back substitution
        mat%p%iparm(3) = 4
     ELSE
        mat%p%iparm(2) = 3  ! Back substitution
        mat%p%iparm(3) = 3
     END IF
     mat%p%iparm(6) = 0  ! Max numbers of iterative refinement steps
     IF(PRESENT(nref)) THEN
        IF(mat%nlsym) THEN
           mat%p%iparm(3) = 5
        ELSE
           mat%p%iparm(3) = 4
        END IF
        mat%p%iparm(6) = nref
     END IF
     mat%nrhs = 1
     IF(PRESENT(sol)) THEN
        sol = rhs
        IF(mat%nlsym) THEN
           CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
                &      mat%perm, mat%invp, sol, mat%rank, mat%nrhs, &
                &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
                &      mat%p%dparm)
        ELSE
           CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, sol, mat%rank, &
                &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
        END IF
     ELSE
        IF(mat%nlsym) THEN
           CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
                &      mat%perm, mat%invp, rhs, mat%rank, mat%nrhs, &
                &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
                &      mat%p%dparm)
        ELSE
           CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, rhs, mat%rank, &
                &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
        END IF
     END IF
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
 !
    END SUBROUTINE bsolve_wsmp_mat1
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE bsolve_zwsmp_mat1(mat, rhs, sol, nref)
 !
 !   Backsolve, using Wsmp
 !
     TYPE(zwsmp_mat)          :: mat
     DOUBLE COMPLEX           :: rhs(:)
     DOUBLE COMPLEX, OPTIONAL :: sol(:)
     INTEGER, OPTIONAL        :: nref
     DOUBLE COMPLEX :: dummy
 !
 !    Recall the matrice if not current
 !
     CALL check_mat(mat)
 !
     IF(mat%nlsym .OR. mat%nlherm) THEN
        mat%p%iparm(2) = 4  ! Back substitution
        mat%p%iparm(3) = 4
     ELSE
        mat%p%iparm(2) = 3  ! Back substitution
        mat%p%iparm(3) = 3
     END IF
     mat%p%iparm(6) = 0  ! Max numbers of iterative refinement steps
     IF(PRESENT(nref)) THEN
        IF(mat%nlsym .OR. mat%nlherm) THEN
           mat%p%iparm(3) = 5
        ELSE
           mat%p%iparm(3) = 4
        END IF
        mat%p%iparm(6) = nref
     END IF
     mat%nrhs = 1
     IF(PRESENT(sol)) THEN
        sol = rhs
        IF(mat%nlsym .OR. mat%nlherm) THEN
           CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
                &      mat%perm, mat%invp, sol, mat%rank, mat%nrhs, &
                &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
                &      mat%p%dparm)
        ELSE
           CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, sol, mat%rank, &
                &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
        END IF
     ELSE
        IF(mat%nlsym .OR. mat%nlherm) THEN
           CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
                &      mat%perm, mat%invp, rhs, mat%rank, mat%nrhs, &
                &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
                &      mat%p%dparm)
        ELSE
           CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, rhs, mat%rank, &
                &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
        END IF
     END IF
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
 !
   END SUBROUTINE bsolve_zwsmp_mat1
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE bsolve_wsmp_matn(mat, rhs, sol, nref)
 !
 !   Backsolve, using Wsmp, multiple RHS
 !
     TYPE(wsmp_mat)             :: mat
     DOUBLE PRECISION           :: rhs(:,:)
     DOUBLE PRECISION, OPTIONAL :: sol(:,:)
     INTEGER, OPTIONAL          :: nref
     DOUBLE PRECISION :: dummy
 !
 !    Recall the matrice if not current
 !
     CALL check_mat(mat)
 !
     IF(mat%nlsym) THEN
        mat%p%iparm(2) = 4  ! Back substitution
        mat%p%iparm(3) = 4
     ELSE
        mat%p%iparm(2) = 3  ! Back substitution
        mat%p%iparm(3) = 3
     END IF
     mat%p%iparm(6) = 0  ! Max numbers of iterative refinement steps
     IF(PRESENT(nref)) THEN
        IF(mat%nlsym) THEN
           mat%p%iparm(3) = 5
        ELSE
           mat%p%iparm(3) = 4
        END IF
        mat%p%iparm(6) = nref
     END IF
     mat%nrhs = SIZE(rhs,2)
     IF(PRESENT(sol)) THEN
        sol = rhs
        IF(mat%nlsym) THEN
           CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
                &      mat%perm, mat%invp, sol, mat%rank, mat%nrhs, &
                &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
                &      mat%p%dparm)
        ELSE
           CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, sol, mat%rank, &
                &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
        END IF
     ELSE
        IF(mat%nlsym) THEN
           CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
                &      mat%perm, mat%invp, rhs, mat%rank, mat%nrhs, &
                &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
                &      mat%p%dparm)
        ELSE
           CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, rhs, mat%rank, &
                &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
        END IF
     END IF
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
 !
   END SUBROUTINE bsolve_wsmp_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE bsolve_zwsmp_matn(mat, rhs, sol, nref)
 !
 !   Backsolve, using Wsmp, multiple RHS
 !
     TYPE(zwsmp_mat)          :: mat
     DOUBLE COMPLEX           :: rhs(:,:)
     DOUBLE COMPLEX, OPTIONAL :: sol(:,:)
     INTEGER, OPTIONAL        :: nref
     DOUBLE COMPLEX :: dummy
 !
 !    Recall the matrice if not current
 !
     CALL check_mat(mat)
 !
     IF(mat%nlsym .or. mat%nlherm) THEN
        mat%p%iparm(2) = 4  ! Back substitution
        mat%p%iparm(3) = 4
     ELSE
        mat%p%iparm(2) = 3  ! Back substitution
        mat%p%iparm(3) = 3
     END IF
     mat%p%iparm(6) = 0  ! Max numbers of iterative refinement steps
     IF(PRESENT(nref)) THEN
        IF(mat%nlsym .OR. mat%nlherm) THEN
           mat%p%iparm(3) = 5
        ELSE
           mat%p%iparm(3) = 4
        END IF
        mat%p%iparm(6) = nref
     END IF
     mat%nrhs = SIZE(rhs,2)
     IF(PRESENT(sol)) THEN
        sol = rhs
        IF(mat%nlsym .OR. mat%nlherm) THEN
           CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
                &      mat%perm, mat%invp, sol, mat%rank, mat%nrhs, &
                &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
                &      mat%p%dparm)
        ELSE
           CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, sol, mat%rank, &
                &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
        END IF
     ELSE
        IF(mat%nlsym .OR. mat%nlherm) THEN
           CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
                &      mat%perm, mat%invp, rhs, mat%rank, mat%nrhs, &
                &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
                &      mat%p%dparm)
        ELSE
           CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, rhs, mat%rank, &
                &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
        END IF
     END IF
     IF(mat%p%iparm(64).NE.0) THEN
        WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%iparm(64)
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
 !
   END SUBROUTINE bsolve_zwsmp_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_wsmp_mat(mat, xarr) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(wsmp_mat)                :: mat
     DOUBLE PRECISION, INTENT(in)  :: xarr(:)
     DOUBLE PRECISION              :: yarr(SIZE(xarr))
     DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
     CHARACTER(len=6) :: matdescra
     INTEGER :: n, i, j
 !
     n = mat%rank
 !
 #ifdef MKL
     IF(mat%nlsym) THEN
        matdescra = 'sun'
     ELSE
        matdescra = 'g'
     END IF
 !
     CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, &
          &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
          &          beta, yarr)
 #else
     yarr = 0.0d0
     DO i=1,n
        DO j=mat%irow(i), mat%irow(i+1)-1
           yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
        END DO
        IF(mat%nlsym) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j)) = yarr(mat%cols(j)) &
                   &              + mat%val(j)*xarr(i)
           END DO
        END IF
     END DO
 #endif
 !
   END FUNCTION vmx_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_zwsmp_mat(mat, xarr) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(zwsmp_mat)             :: mat
     DOUBLE COMPLEX, INTENT(in)  :: xarr(:)
     DOUBLE COMPLEX              :: yarr(SIZE(xarr))
     DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
     INTEGER :: n, i, j
     CHARACTER(len=6) :: matdescra
     CHARACTER(len=1) :: transa
 !
     n = mat%rank
 !
 #ifdef MKL
     IF(mat%nlsym) THEN
        matdescra = 'sun'
     ELSE IF(mat%nlherm) THEN
        matdescra = 'hun'
     ELSE
        matdescra = 'g'
     END IF
     transa='N'
     IF(mat%nlherm) THEN
        transa='T'   ! Transpose(CSR-UT*) = CSC-LT* = CSR-UT
     END IF
     CALL mkl_zcsrmv(transa, n, n, alpha, matdescra, mat%val, &
          &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
          &          beta, yarr)
 #else
     yarr = (0.0d0,0.0d0)
     DO i=1,n
        IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)*
           DO j=mat%irow(i), mat%irow(i+1)-1
              yarr(i) = yarr(i) + CONJG(mat%val(j))*xarr(mat%cols(j))
           END DO
        ELSE
           DO j=mat%irow(i), mat%irow(i+1)-1
              yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
           END DO
        END IF
        IF(mat%nlsym) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j)) = yarr(mat%cols(j)) &
                   &              + mat%val(j)*xarr(i)
           END DO
        ELSE IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)*
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j)) = yarr(mat%cols(j)) &
                   &              + mat%val(j)*xarr(i)
           END DO
        END IF
     END DO
 #endif
 !
   END FUNCTION vmx_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_wsmp_matn(mat, xarr) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(wsmp_mat)                :: mat
     DOUBLE PRECISION, INTENT(in)  :: xarr(:,:)
     DOUBLE PRECISION              :: yarr(SIZE(xarr,1),SIZE(xarr,2))
 !
     DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
     INTEGER :: n, nrhs, i, j
     CHARACTER(len=6) :: matdescra
 !
     n = mat%rank
     nrhs = SIZE(xarr,2)
 !
 #ifdef MKL
     IF(mat%nlsym) THEN
        matdescra = 'sun'
     ELSE
        matdescra = 'g'
     END IF
 !
     CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,&
          &           mat%cols, mat%irow(1), mat%irow(2), xarr, &
          &           n, beta, yarr, n)
 #else
     yarr = 0.0d0
     DO i=1,n
        DO j=mat%irow(i), mat%irow(i+1)-1
           yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
        END DO
        IF(mat%nlsym) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
                   &                + mat%val(j)*xarr(i,:)
           END DO
        END IF
     END DO
 #endif
 !
   END FUNCTION vmx_wsmp_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   FUNCTION vmx_zwsmp_matn(mat, xarr) RESULT(yarr)
 !
 !   Return product mat*x
 !
     TYPE(zwsmp_mat)             :: mat
     DOUBLE COMPLEX, INTENT(in)  :: xarr(:,:)
     DOUBLE COMPLEX              :: yarr(SIZE(xarr,1),SIZE(xarr,2))
 !
     DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
     INTEGER :: n, nrhs, i, j
     CHARACTER(len=6) :: matdescra
     CHARACTER(len=1) :: transa
 !
     n = mat%rank
     nrhs = SIZE(xarr,2)
 !
 #ifdef MKL
     IF(mat%nlsym) THEN
        matdescra = 'sun'
     ELSE IF(mat%nlherm) THEN
        matdescra = 'hun'
     ELSE
        matdescra = 'g'
     END IF
     transa='N'
     IF(mat%nlherm) THEN
        transa='T' ! Transpose(CSR-UT*) = CSC-LT* = CSR-UT
     END IF
 !
     CALL mkl_zcsrmm(transa, n, nrhs, n, alpha, matdescra, mat%val, &
          &          mat%cols, mat%irow(1), mat%irow(2), xarr, n, &
          &          beta, yarr, n)
 #else
     yarr = (0.0d0,0.0d0)
     DO i=1,n
        IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)*
           DO j=mat%irow(i), mat%irow(i+1)-1
              yarr(i,:) = yarr(i,:) + CONJG(mat%val(j))*xarr(mat%cols(j),:)
           END DO
        ELSE
           DO j=mat%irow(i), mat%irow(i+1)-1
              yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
           END DO
        END IF
        IF(mat%nlsym) THEN
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
                   &                + mat%val(j)*xarr(i,:)
           END DO
        ELSE IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)*
           DO j=mat%irow(i)+1, mat%irow(i+1)-1
              yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
                   &               + mat%val(j)*xarr(i,:)
           END DO
        END IF
     END DO
 #endif
 !
   END FUNCTION vmx_zwsmp_matn
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE destroy_wsmp_mat(mat)
 !
 !  Deallocate the sparse matrix mat
 !
     TYPE(wsmp_mat) :: mat
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL destroy(mat%mat)
        DEALLOCATE(mat%mat)
     END IF
 !
 !  Release memory for factors for symmetric matrix
     IF(mat%nlsym) THEN
        CALL check_mat(mat)
        CALL wsffree
     END IF
 !
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
     IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp)
     IF(ASSOCIATED(mat%mrp)) DEALLOCATE(mat%mrp)
     IF(ASSOCIATED(mat%diag)) DEALLOCATE(mat%diag)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     IF(ASSOCIATED(mat%aux)) DEALLOCATE(mat%aux)
   END SUBROUTINE destroy_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE destroy_zwsmp_mat(mat)
 !
 !  Deallocate the sparse matrix mat
 !
     TYPE(zwsmp_mat) :: mat
 !
     IF(ASSOCIATED(mat%mat)) THEN
        CALL destroy(mat%mat)
        DEALLOCATE(mat%mat)
     END IF
 !
 !  Release memory for factors for symmetric/hermitian matrix
     IF(mat%nlsym .OR. mat%nlherm) THEN
        CALL check_mat(mat)
        CALL wsffree
     END IF
 !
     IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
     IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
     IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
     IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp)
     IF(ASSOCIATED(mat%mrp)) DEALLOCATE(mat%mrp)
     IF(ASSOCIATED(mat%diag)) DEALLOCATE(mat%diag)
     IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
     IF(ASSOCIATED(mat%aux)) DEALLOCATE(mat%aux)
   END SUBROUTINE destroy_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE put_wsmp_mat(fid, label, mat, str)
 !
 !   Write matrix to hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)                    :: fid
     CHARACTER(len=*), INTENT(in)           :: label
     TYPE(wsmp_mat)                      :: mat
     CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
 !
     IF(PRESENT(str)) THEN
        CALL creatg(fid, label, str)
     ELSE
        CALL creatg(fid, label)
     END IF
     CALL attach(fid, label, 'RANK', mat%rank)
     CALL attach(fid, label, 'NNZ',  mat%nnz)
     CALL attach(fid, label, 'NLSYM', mat%nlsym)
     CALL attach(fid, label, 'NLPOS', mat%nlpos)
     CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
     CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
     CALL putarr(fid, TRIM(label)//'/val', mat%val)
 !
     CALL creatg(fid, TRIM(label)//'/p')
     CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
     CALL putarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm)
   END SUBROUTINE put_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE put_zwsmp_mat(fid, label, mat, str)
 !
 !   Write matrix to hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)                    :: fid
     CHARACTER(len=*), INTENT(in)           :: label
     TYPE(zwsmp_mat)                        :: mat
     CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
 !
     IF(PRESENT(str)) THEN
        CALL creatg(fid, label, str)
     ELSE
        CALL creatg(fid, label)
     END IF
     CALL attach(fid, label, 'RANK', mat%rank)
     CALL attach(fid, label, 'NNZ',  mat%nnz)
     CALL attach(fid, label, 'NLSYM', mat%nlsym)
     CALL attach(fid, label, 'NLPOS', mat%nlpos)
     CALL attach(fid, label, 'NLHERM', mat%nlherm)
     CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
     CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
     CALL putarr(fid, TRIM(label)//'/val', mat%val)
 !
     CALL creatg(fid, TRIM(label)//'/p')
     CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
     CALL putarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm)
   END SUBROUTINE put_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE get_wsmp_mat(fid, label, mat)
 !
 !   Read matrix from hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)          :: fid
     CHARACTER(len=*), INTENT(in) :: label
     TYPE(wsmp_mat)               :: mat
 !
     CALL getatt(fid, label, 'RANK', mat%rank)
     CALL getatt(fid, label, 'NNZ',  mat%nnz)
     CALL getatt(fid, label, 'NLSYM', mat%nlsym)
     CALL getatt(fid, label, 'NLPOS', mat%nlpos)
     CALL getarr(fid, TRIM(label)//'/irow', mat%irow)
     CALL getarr(fid, TRIM(label)//'/cols', mat%cols)
     IF(mat%nlsym) THEN
        CALL getarr(fid, TRIM(label)//'/perm', mat%perm)
        CALL getarr(fid, TRIM(label)//'/invp', mat%invp)
     END IF
     CALL getarr(fid, TRIM(label)//'/val', mat%val)
 !
     CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
     CALL getarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm)
   END SUBROUTINE get_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE get_zwsmp_mat(fid, label, mat)
 !
 !   Read matrix from hdf5 file
 !
     USE futils
 !
     INTEGER, INTENT(in)          :: fid
     CHARACTER(len=*), INTENT(in) :: label
     TYPE(zwsmp_mat)              :: mat
 !
     CALL getatt(fid, label, 'RANK', mat%rank)
     CALL getatt(fid, label, 'NNZ',  mat%nnz)
     CALL getatt(fid, label, 'NLSYM', mat%nlsym)
     CALL getatt(fid, label, 'NLPOS', mat%nlpos)
     CALL getatt(fid, label, 'NLHERM', mat%nlherm)
     CALL getarr(fid, TRIM(label)//'/irow', mat%irow)
     CALL getarr(fid, TRIM(label)//'/cols', mat%cols)
     IF(mat%nlsym) THEN
        CALL getarr(fid, TRIM(label)//'/perm', mat%perm)
        CALL getarr(fid, TRIM(label)//'/invp', mat%invp)
     END IF
     CALL getarr(fid, TRIM(label)//'/val', mat%val)
 !
     CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
     CALL getarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm)
   END SUBROUTINE get_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE mcopy_wsmp_mat(mata, matb)
 !
 !   Matrix copy: B = A
 !
     TYPE(wsmp_mat) :: mata, matb
     INTEGER :: n, nnz
 !
 ! Assume that matb was already initialized by init_wsmp_mat.
     IF(matb%rank.LE.0) THEN
        WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT'
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
 !
     IF(ASSOCIATED(matb%mat)) THEN 
        CALL destroy(matb%mat)
        DEALLOCATE(matb%mat)
     END IF
 !
     n = mata%rank
     nnz = mata%nnz
     matb%rank = n
     matb%nnz = nnz
     matb%nlsym = mata%nlsym
     matb%nlpos = mata%nlpos
     matb%nlforce_zero = mata%nlforce_zero
 !
     IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
     IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols)
     IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow)
     IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm)
     IF(ASSOCIATED(matb%invp)) DEALLOCATE(matb%invp)
     ALLOCATE(matb%val(nnz)); matb%val = mata%val
     ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols
     ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow
     ALLOCATE(matb%perm(n))
     IF(matb%nlsym) THEN
        ALLOCATE(matb%perm(n))
        ALLOCATE(matb%invp(n))
     END IF
   END SUBROUTINE mcopy_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE mcopy_zwsmp_mat(mata, matb)
 !
 !   Matrix copy: B = A
 !
     TYPE(zwsmp_mat) :: mata, matb
     INTEGER :: n, nnz
 !
 ! Assume that matb was already initialized by init_wsmp_mat.
     IF(matb%rank.LE.0) THEN
        WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT'
        STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
     END IF
 !
     IF(ASSOCIATED(matb%mat)) THEN 
        CALL destroy(matb%mat)
        DEALLOCATE(matb%mat)
     END IF
 !
     n = mata%rank
     nnz = mata%nnz
     matb%rank = n
     matb%nnz = nnz
     matb%nlsym = mata%nlsym
     matb%nlherm = mata%nlherm
     matb%nlpos = mata%nlpos
     matb%nlforce_zero = mata%nlforce_zero
 !
     IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
     IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols)
     IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow)
     IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm)
     IF(ASSOCIATED(matb%invp)) DEALLOCATE(matb%invp)
     ALLOCATE(matb%val(nnz)); matb%val = mata%val
     ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols
     ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow
     ALLOCATE(matb%perm(n))
     IF(matb%nlsym) THEN
        ALLOCATE(matb%perm(n))
        ALLOCATE(matb%invp(n))
     END IF
   END SUBROUTINE mcopy_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE maddto_wsmp_mat(mata, alpha, matb)
 !
 !   A <- A + alpha*B
 !
     TYPE(wsmp_mat)   :: mata, matb
     DOUBLE PRECISION :: alpha
 !
     mata%val = mata%val + alpha*matb%val
   END SUBROUTINE maddto_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE maddto_zwsmp_mat(mata, alpha, matb)
 !
 !   A <- A + alpha*B
 !
     TYPE(zwsmp_mat) :: mata, matb
     DOUBLE COMPLEX  :: alpha
 !
     mata%val = mata%val + alpha*matb%val
   END SUBROUTINE maddto_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE psum_wsmp_mat(mat, comm)
 !
 !   Parallel sum of sparse matrices
 !
     INCLUDE "mpif.h"
 !
     TYPE(wsmp_mat)   :: mat
     INCLUDE 'psum_mat.tpl'
   END SUBROUTINE psum_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE psum_zwsmp_mat(mat, comm)
 !
 !   Parallel sum of sparse matrices
 !
     INCLUDE "mpif.h"
 !
     TYPE(zwsmp_mat)   :: mat
     INCLUDE 'psum_mat.tpl'
   END SUBROUTINE psum_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE p2p_wsmp_mat(mat, dest, extyp, op, comm)
 !
 !   Point-to-point combine sparse matrix between 2 processes
 !
     INCLUDE "mpif.h"
 !
     TYPE(wsmp_mat)            :: mat
     DOUBLE PRECISION, ALLOCATABLE :: val(:)
     INTEGER :: mpi_type=MPI_DOUBLE_PRECISION
 !
     INCLUDE "p2p_mat.tpl"
   END SUBROUTINE p2p_wsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   SUBROUTINE p2p_zwsmp_mat(mat, dest, extyp, op, comm)
 !
 !   Point-to-point combine sparse matrix between 2 processes
 !
     INCLUDE "mpif.h"
 !
     TYPE(zwsmp_mat)           :: mat
     DOUBLE COMPLEX, ALLOCATABLE :: val(:)
     INTEGER :: mpi_type=MPI_DOUBLE_COMPLEX
 !
     INCLUDE "p2p_mat.tpl"
   END SUBROUTINE p2p_zwsmp_mat
 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 END MODULE wsmp_bsplines
diff --git a/src/zconmat.tpl b/src/zconmat.tpl
index 2d40218..8662a8f 100644
--- a/src/zconmat.tpl
+++ b/src/zconmat.tpl
@@ -1,214 +1,214 @@
 !>
 !> @file zconmat.tpl
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !   In this version s[lines are precalculted
 !   (on all n1/n2 intervals
 !
   INTERFACE
      SUBROUTINE coefeq(x, y, idt, idw, c)
        DOUBLE PRECISION, INTENT(in) :: x, y
        INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
        DOUBLE COMPLEX, INTENT(out) :: c(:)
      END SUBROUTINE coefeq
   END INTERFACE
   INTEGER, OPTIONAL :: maxder(2)   ! maximum oder of derivatives
   LOGICAL, OPTIONAL :: nat_order   ! Natural ordering for 2d-1d mapping
 !
   INTEGER :: n1, nidbas1, ndim1, n1e
   INTEGER :: n2, nidbas2, ndim2, n2e
   INTEGER :: ng1, ng2
   INTEGER :: i1, i2, ig1, ig2
   INTEGER :: igt1, igt2, igw1, igw2, irow, jcol
   INTEGER, ALLOCATABLE :: left1(:), left2(:)
 !
   LOGICAL :: nlper1, nlper2, nlnat
 !
   INTEGER :: kterms         ! Number of terms in weak form
   INTEGER :: k, kmaxder, it1, iw1, it2, iw2
   INTEGER, ALLOCATABLE :: idert(:,:), iderw(:,:)  ! Derivative order
   DOUBLE COMPLEX :: one=(1.0d0,0.0d0), zero=(0.0d0,0.0d0)
   DOUBLE COMPLEX, ALLOCATABLE  :: coefs(:,:,:)  ! Terms in weak form
 !
   DOUBLE PRECISION, POINTER :: xg1(:,:), xg2(:,:), wg1(:,:), wg2(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:,:), fun2(:,:,:,:)
   DOUBLE COMPLEX, ALLOCATABLE :: mata(:,:,:,:), matc(:,:)
   DOUBLE COMPLEX, ALLOCATABLE :: matg(:,:,:), matf(:,:,:), matcg(:,:,:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
   CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
   nlper1 = spl%sp1%period
   nlper2 = spl%sp2%period
 !
   n1e = n1+nidbas1  ! Number of elements in 1st coordinate
   n2e = n2+nidbas2  ! Number of elements in 2nd coordinate
   IF(nlper2) n2e = n2
 !
 !   Gauss points and weights on all intervals
 !
   xg1 => spl%sp1%gausx  ! xg1(ng1,n1)
   wg1 => spl%sp1%gausw  ! wg1(ng1,n1)
   ng1 = SIZE(xg1,1)
   xg2 => spl%sp2%gausx
   wg2 => spl%sp2%gausw
   ng2 = SIZE(xg2,1)
 !
 !   Splines on all intervals
 !
   kmaxder = 1
   IF(PRESENT(maxder)) kmaxder = maxder(1)
   ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1,n1))
   ALLOCATE(left1(ng1))
   DO i1=1,n1
      left1 = i1
      CALL basfun(xg1(:,i1), spl%sp1, fun1(:,:,:,i1), left1)
   END DO
   DEALLOCATE(left1)
 !
   kmaxder = 1
   IF(PRESENT(maxder)) kmaxder = maxder(2)
   ALLOCATE(fun2(0:nidbas2,0:kmaxder,ng2,n2))
   ALLOCATE(left2(ng2))
   DO i2=1,n2
      left2 = i2
      CALL basfun(xg2(:,i2), spl%sp2, fun2(:,:,:,i2), left2)
   END DO
   DEALLOCATE(left2)
 !
 !   Ordering in local to global matrix mapping
 !
   nlnat = .FALSE.
   IF(PRESENT(nat_order)) nlnat = nat_order
 !===========================================================================
 !              2.0 Assembly loop
 !
 !   Weak form
 !
   kterms = mat%nterms
   ALLOCATE(idert(kterms,2))
   ALLOCATE(iderw(kterms,2))
   ALLOCATE(coefs(kterms,ng1,ng2))
 !
 !   Allocate local matrices
 !
   ALLOCATE(mata(0:nidbas1,0:nidbas1,0:nidbas2,0:nidbas2))
   ALLOCATE(matc(ng1,ng2))
   ALLOCATE(matg(0:nidbas2,0:nidbas2,ng2))
   ALLOCATE(matf(0:nidbas1,0:nidbas1,ng1))
   ALLOCATE(matcg(ng1,0:nidbas2,0:nidbas2))
 !
   DO i1=1,n1
      DO i2=1,n2
 !
 !   Coefficients of the weak form
 !
         DO ig1=1,ng1
            DO ig2=1,ng2
               CALL coefeq(xg1(ig1,i1), xg2(ig2,i2), &
                    &      idert, iderw, coefs(:,ig1,ig2))
            END DO
         END DO
 !
 !   Compute local matrix: A <- E*(C*D^T) + A
 !
         mata = 0.0d0
         DO k=1,kterms
 !
            matc(1:ng1,1:ng2) = coefs(k,1:ng1,1:ng2)
 !
            DO it1=0,nidbas1
               DO iw1=0,nidbas1
                  DO ig1=1,ng1
                     matf(it1,iw1,ig1) = wg1(ig1,i1) * &
                          &  fun1(it1,idert(k,1),ig1,i1) * &
                          &  fun1(iw1,iderw(k,1),ig1,i1)
                  END DO
               END DO
            END DO
 !
            DO it2=0,nidbas2
               DO iw2=0,nidbas2
                  DO ig2=1,ng2
                     matg(it2,iw2,ig2) = wg2(ig2,i2) * &
                          &  fun2(it2,idert(k,2),ig2,i2) * &
                          &  fun2(iw2,iderw(k,2),ig2,i2)
                  END DO
               END DO
            END DO
 !
            CALL zgemm('N', 'T', ng1, (nidbas2+1)*(nidbas2+1), ng2, one, &
                 &      matc, ng1, matg, (nidbas2+1)*(nidbas2+1), zero, &
                 &      matcg, ng1)
            CALL zgemm('N', 'N', (nidbas1+1)*(nidbas1+1), (nidbas2+1)*(nidbas2+1), &
                 &      ng1, one, matf, (nidbas1+1)*(nidbas1+1), matcg, ng1, one, &
                 &      mata, (nidbas1+1)*(nidbas1+1))
 !
         END DO
 !
 !   Map local matrix A to global matrix
 !
         DO it1=0,nidbas1
            igt1 = i1+it1; IF(nlper1) igt1 = MODULO(igt1-1,n1) + 1
            DO it2=0,nidbas2
               igt2 = i2+it2; IF(nlper2) igt2 = MODULO(igt2-1, n2) + 1
               irow = glmap(igt1, igt2, n1e, n2e)
               DO iw1=0,nidbas1
                  igw1 = i1+iw1; IF(nlper1) igw1 = MODULO(igw1-1,n1) + 1
                  DO iw2=0,nidbas2
                     igw2 = i2+iw2; IF(nlper2) igw2 = MODULO(igw2-1, n2) + 1
                     jcol = glmap(igw1, igw2, n1e, n2e)
                     CALL updtmat(mat, irow, jcol, mata(it1,iw1,it2,iw2))
                  END DO
               END DO
            END DO
         END DO
 !
      END DO
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun1)
   DEALLOCATE(fun2)
   DEALLOCATE(idert, iderw, coefs)
   DEALLOCATE(mata)
   DEALLOCATE(matc)
   DEALLOCATE(matg)
   DEALLOCATE(matcg)
   DEALLOCATE(matf)
 !
 CONTAINS
   INTEGER FUNCTION glmap(i,j,n1,n2)
     INTEGER, INTENT(in) :: i,j,n1,n2
     IF(nlnat) THEN
        glmap = (j-1)*n1 + i
     ELSE
        glmap = (i-1)*n2 + j
     END IF
   END FUNCTION glmap
diff --git a/src/zconmat_1d.tpl b/src/zconmat_1d.tpl
index d066822..2de4621 100644
--- a/src/zconmat_1d.tpl
+++ b/src/zconmat_1d.tpl
@@ -1,144 +1,144 @@
 !>
 !> @file zconmat_1d.tpl
 !>
 !> @brief
 !>
 !> @copyright
 !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 !> SPC (Swiss Plasma Center)
 !>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
 !> the terms of the GNU Lesser General Public License as published by the Free
 !> Software Foundation, either version 3 of the License, or (at your option)
 !> any later version.
 !>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 !>
 !> You should have received a copy of the GNU Lesser General Public License
 !> along with this program. If not, see <https://www.gnu.org/licenses/>.
 !>
 !> @authors
 !> (in alphabetical order)
 !> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 !>
 !
 !   In this version s[lines are precalculted
 !   (on all n1/n2 intervals
 !
   INTERFACE
      SUBROUTINE coefeq(x, idt, idw, c)
        DOUBLE PRECISION, INTENT(in) :: x
        INTEGER, INTENT(out) :: idt(:), idw(:)
        DOUBLE COMPLEX, INTENT(out) :: c(:)
      END SUBROUTINE coefeq
   END INTERFACE
   INTEGER, OPTIONAL :: maxder   ! maximum oder of derivatives
 !
   INTEGER :: n1, nidbas1, ndim1, ng1
   INTEGER :: i1, ig1
   INTEGER :: irow, jcol
   INTEGER, ALLOCATABLE :: left1(:)
 !
   LOGICAL :: nlper1
 !
   INTEGER :: kterms         ! Number of terms in weak form
   INTEGER :: k, kmaxder, it1, iw1
   INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
   DOUBLE COMPLEX :: one=(1.0d0,0.0d0), zero=(0.0d0,0.0d0)
   DOUBLE COMPLEX, ALLOCATABLE  :: coefs(:,:)  ! Terms in weak form
 !
   DOUBLE PRECISION, POINTER :: xg1(:,:), wg1(:,:)
   DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:,:), fun2(:,:,:,:)
   DOUBLE COMPLEX, ALLOCATABLE :: mata(:,:), matc(:)
   DOUBLE COMPLEX, ALLOCATABLE :: matf(:,:,:)
 !===========================================================================
 !              1.0 Prologue
 !
 !   Properties of spline space
 !
   CALL get_dim(spl, ndim1, n1, nidbas1)
   nlper1 = spl%period
 !
 !   Gauss points and weights on all intervals
 !
   xg1 => spl%gausx  ! xg1(ng1,n1)
   wg1 => spl%gausw  ! wg1(ng1,n1)
   ng1 = SIZE(xg1,1)
 !
 !   Splines on all intervals
 !
   kmaxder = 1
   IF(PRESENT(maxder)) kmaxder = maxder
   ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1,n1))
   ALLOCATE(left1(ng1))
   DO i1=1,n1
      left1 = i1
      CALL basfun(xg1(:,i1), spl, fun1(:,:,:,i1), left1)
   END DO
   DEALLOCATE(left1)
 !===========================================================================
 !              2.0 Assembly loop
 !
 !   Weak form
 !
   kterms = mat%nterms
   ALLOCATE(idert(kterms))
   ALLOCATE(iderw(kterms))
   ALLOCATE(coefs(kterms,ng1))
 !
 !   Allocate local matrices
 !
   ALLOCATE(mata(0:nidbas1,0:nidbas1))
   ALLOCATE(matc(ng1))
   ALLOCATE(matf(0:nidbas1,0:nidbas1,ng1))
 !
   DO i1=1,n1
 !
 !   Coefficients of the weak form
 !
      DO ig1=1,ng1
         CALL coefeq(xg1(ig1,i1), idert, iderw, coefs(:,ig1))
      END DO
 !
 !   Compute local matrix: A <- F*c + A
 !
      mata = 0.0d0
      DO k=1,kterms
 !
         matc(1:ng1) = coefs(k,1:ng1)
 !
         DO it1=0,nidbas1
            DO iw1=0,nidbas1
               DO ig1=1,ng1
                  matf(it1,iw1,ig1) = wg1(ig1,i1) * &
                       &  fun1(it1,idert(k),ig1,i1) * &
                       &  fun1(iw1,iderw(k),ig1,i1)
               END DO
            END DO
         END DO
 !
         CALL zgemv('N', (nidbas1+1)*(nidbas1+1), ng1, one, matf, &
              &          (nidbas1+1)*(nidbas1+1), matc, 1, one, mata, 1)
      END DO
 !
 !   Map local matrix A to global matrix
 !
      DO it1=0,nidbas1
         irow = i1+it1; IF(nlper1) irow = MODULO(irow-1,n1) + 1
         DO iw1=0,nidbas1
            jcol = i1+iw1; IF(nlper1) jcol = MODULO(jcol-1,n1) + 1
            CALL updtmat(mat, irow, jcol, mata(it1,iw1))
         END DO
      END DO
 !
   END DO
 !===========================================================================
 !              9.0  Epilogue
 !
   DEALLOCATE(fun1)
   DEALLOCATE(idert, iderw, coefs)
   DEALLOCATE(mata)
   DEALLOCATE(matc)
   DEALLOCATE(matf)
diff --git a/wk/CMakeLists.txt b/wk/CMakeLists.txt
index 64a12ba..546792b 100644
--- a/wk/CMakeLists.txt
+++ b/wk/CMakeLists.txt
@@ -1,78 +1,78 @@
 #
 # @file CMakeLists.txt
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Nicolas Richart <nicolas.richart@epfl.ch>
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 set(BS_TESTS
   driv1 driv2 driv3 driv4
   pde1d pde1dp pde1dp_cmpl
   pde2d pde2d_pb
   pde1dp_cmpl_dft
   pde3d
   fit1d fit1dbc fit1dp
   fit2d fit2d1d fit2d_cmpl fit2dbc fit2dbc_x fit2dbc_y
   moments optim1 optim2 optim3
   tcdsmat tmassmat tbasfun tsparse1 test_kron
   )
 
 if(HAS_PARDISO)
   set(BS_TESTS ${BS_TESTS}
     pde1dp_cmpl_pardiso
     pde2d_pardiso
     pde2d_sym_pardiso
     pde2d_sym_pardiso_dft
     )
 endif()
 
 if(HAS_MUMPS)
   set(BS_TESTS ${BS_TESTS}
     pde2d_mumps
     pde1dp_cmpl_mumps
     )
 endif()
 
 set(RUNTESTS "${CMAKE_CURRENT_SOURCE_DIR}/runtest.sh")
 set(BIN_DIR "${bsplines_tests_BINARY_DIR}")
 set(INPUT_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
 
 foreach(prog ${BS_TESTS})
   add_test(${prog} ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1
     ${RUNTESTS} ${BIN_DIR}/${prog} ${INPUT_DIR}
     )
 endforeach()
 
 # Special cases!
 if(HAS_PARDISO)
   add_test(tsparse2  ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1
     ${BIN_DIR}/tsparse2
     )
 endif()
 
 add_test(ppde3d  ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1
   ${BIN_DIR}/ppde3d ${INPUT_DIR}/ppde3d.in
   )
 
 add_test(ppde3d_pb ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1
   ${BIN_DIR}/ppde3d ${INPUT_DIR}/ppde3d_pb.in
   )
diff --git a/wk/runtest.sh b/wk/runtest.sh
index 810d2bb..d4cc61d 100644
--- a/wk/runtest.sh
+++ b/wk/runtest.sh
@@ -1,37 +1,37 @@
 #
 # @file runtest.sh
 #
 # @brief
 #
 # @copyright
 # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
 # SPC (Swiss Plasma Center)
 #
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Lesser General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option)
 # any later version.
 #
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 # @authors
 # (in alphabetical order)
 # @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
 #
 #!/bin/sh
 
 progname=$1
 input_dir=$2
 
 prog=$(basename ${progname})
 input_file=${input_dir}/${prog}.in
 
 ${progname} < $input_file
 
 exit $?