diff --git a/CMakeLists.txt b/CMakeLists.txt
index c01c54d..aba8597 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,150 +1,150 @@
#
# @file CMakeLists.txt
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Nicolas Richart
# @author Trach-Minh Tran
#
cmake_minimum_required (VERSION 2.8.8)
#cmake_policy(SET CMP0053 NEW)
project(bsplines_root Fortran C)
#enable_language(Fortran)
enable_testing()
option(BSPLINES_USE_MUMPS "Activate the mumps interface" ON)
if(NOT DEFINED BSPLINES_EXAMPLES)
option(BSPLINES_EXAMPLES "Compiles the examples" ON)
endif()
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake")
# Assume we are on CRAY if ftn is the Fortran compiler
if (${CMAKE_Fortran_COMPILER} MATCHES "ftn$")
set(CRAY TRUE)
if(${CMAKE_Fortran_COMPILER_ID} MATCHES "Cray")
set(cray_suffix cray)
elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "PGI")
set(cray_suffix pgi)
elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel")
set(cray_suffix intel)
elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "GNU")
set(cray_suffix gnu)
endif()
else()
set(CRAY FALSE)
endif()
if(POLICY CMP0074)
cmake_policy(SET CMP0074 NEW)
endif()
include(CMakeFlagsHandling)
# Compiler flags for debug/optimization
if(${CMAKE_Fortran_COMPILER_ID} MATCHES "Cray")
elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel")
set(CMAKE_AR ${XIAR})
add_flags(LANG Fortran TYPE DEBUG -traceback "-check bounds" "-warn unused")
add_flags(LANG Fortran TYPE RELEASE -xHost)
elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "GNU")
add_flags(LANG Fortran TYPE DEBUG -fbounds-check -fbacktrace)
endif()
if(NOT MUMPS)
set(MUMPS $ENV{MUMPS_ROOT})
endif()
# Installation root directory
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set(PREFIX $ENV{PREFIX})
if(PREFIX)
set(${CMAKE_INSTALL_PREFIX} ${PREFIX})
else()
set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_SOURCE_DIR} CACHE PATH "..." FORCE)
endif()
message(STATUS "CMAKE_INSTALL_PREFIX is " ${CMAKE_INSTALL_PREFIX})
endif()
# Search and load the FUTILS configuration file
if(NOT TARGET futils)
find_package(futils PATHS ${FUTILS}/lib/cmake REQUIRED)
endif()
if(BSPLINES_USE_MUMPS)
find_package(Mumps REQUIRED)
set(HAS_MUMPS ${MUMPS_FOUND})
else()
set(HAS_MUMPS FALSE)
endif()
# Find lapack/blas. Skip it if on CRAY!
if(CRAY)
set(BSPLINES_USE_PARDISO OFF)
endif()
include(blas)
if(NOT BSPLINES_EXPORT_TARGETS)
set(BSPLINES_EXPORT_TARGETS bsplines-targets)
endif()
find_package(MPI COMPONENTS Fortran REQUIRED)
include(GNUInstallDirs)
add_subdirectory(pppack)
add_subdirectory(pputils2)
add_subdirectory(fft)
add_subdirectory(src)
if(HAS_MUMPS AND BSPLINES_EXAMPLES)
add_subdirectory(multigrid)
endif()
if(BSPLINES_EXAMPLES)
add_subdirectory(examples)
add_subdirectory(wk)
endif()
export(TARGETS pppack pputils2 bsplines fft
FILE "${CMAKE_BINARY_DIR}/bsplinesLibraryDepends.cmake")
export(PACKAGE bsplines)
# install configuration files
if(BSPLINES_EXPORT_TARGETS MATCHES "bsplines-targets")
install(EXPORT bsplines-targets
DESTINATION lib/cmake
)
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/cmake/bsplines-config.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/cmake/bsplines-config.cmake @ONLY
)
install(FILES
${CMAKE_CURRENT_BINARY_DIR}/cmake/bsplines-config.cmake
DESTINATION lib/cmake
)
endif()
# enable packaging with CPack
include(CPack)
diff --git a/cmake/CMakeFlagsHandling.cmake b/cmake/CMakeFlagsHandling.cmake
index d9955fe..ccab47d 100644
--- a/cmake/CMakeFlagsHandling.cmake
+++ b/cmake/CMakeFlagsHandling.cmake
@@ -1,100 +1,100 @@
#
# @file CMakeFlagsHandling.cmake
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Nicolas Richart
#
if(_CMAKE_FLAGS_HANDLING)
return()
endif()
set(_CMAKE_FLAGS_HANDLING TRUE)
#===============================================================================
# Compilation options handling
#===============================================================================
macro(_get_flags_message lang type desc)
if(${lang} MATCHES "C.." OR ${lang} MATCHES "Fortran")
set(${desc} "Flags used by the compiler")
elseif(${lang} MATCHES ".*_LINKER")
set(${desc} "Flags used by the linker")
endif()
if(${lang} MATCHES "SHARED_LINKER")
set(${desc} "${desc} during the creation of shared libraries")
elseif(${lang} MATCHES "MODULE_LINKER")
set(${desc} "${desc} during the creation of modules")
elseif(${lang} MATCHES "STATIC_LINKER")
set(${desc} "${desc} linker during the creation of static libraries")
endif()
if(${type} MATCHES "ALL")
set(${desc} "${desc} during all build types")
else()
set(${desc} "${desc} during ${type} builds")
endif()
endmacro()
#===============================================================================
function(handle_flags)
include(CMakeParseArguments)
cmake_parse_arguments(_flags
"ADD;REMOVE" "LANG;TYPE" ""
${ARGN}
)
if(NOT _flags_LANG)
set(_flags_LANG ${FLAGS_HANDLING_DEFAULT_LANGUAGE})
endif()
set(_variable CMAKE_${_flags_LANG}_FLAGS)
if (_flags_TYPE)
set(_variable ${_variable}_${_flags_TYPE})
else()
set(_flags_TYPE "ALL")
endif()
_get_flags_message(${_flags_LANG} ${_flags_TYPE} _desc)
foreach(flag ${_flags_UNPARSED_ARGUMENTS})
if (_flags_ADD)
string(REPLACE "${flag}" "match" _temp_var "${${_variable}}")
if(NOT _temp_var MATCHES "match")
set(${_variable} "${flag} ${${_variable}}" CACHE STRING ${_desc} FORCE)
endif()
elseif(_flags_REMOVE)
string(REPLACE "${flag} " "" ${_variable} "${${_variable}}")
set(${_variable} "${${_variable}}" CACHE STRING ${_desc} FORCE)
endif()
endforeach()
endfunction()
#===============================================================================
function(add_flags)
handle_flags(ADD ${ARGN})
endfunction()
#===============================================================================
function(remove_flags)
handle_flags(REMOVE ${ARGN})
endfunction()
#===============================================================================
diff --git a/cmake/CheckFindMumps.c b/cmake/CheckFindMumps.c
index 3b006f7..b402816 100644
--- a/cmake/CheckFindMumps.c
+++ b/cmake/CheckFindMumps.c
@@ -1,105 +1,105 @@
/**
* @file CheckFindMumps.c
*
* @brief
*
* @copyright
* Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
* SPC (Swiss Plasma Center)
*
- * spclibs is free software: you can redistribute it and/or modify it under
+ * SPClibs is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
- * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
* @authors
* (in alphabetical order)
* @author Nicolas Richart
*/
#include
#if !defined(MUMPS_SEQ)
# include
#endif
#define JOB_INIT -1
#define JOB_END -2
#define JOB_COMPLETE 6
#define USE_COMM_WORLD -987654
#define icntl(n) id.icntl[n - 1]
int main(int argc, char **argv) {
int n = 2;
int nz = 2;
int irn[2] = {1, 2};
int jcn[2] = {1, 2};
Real a[2];
Real rhs[2];
#if !defined(MUMPS_SEQ)
MPI_Init(&argc, &argv);
#endif
rhs[0] = 1.0; rhs[1]=4.0;
a[0] = 1.0; a[1] = 2.0;
id.job = JOB_INIT;
id.par = 1;
id.sym = 0;
#if !defined(MUMPS_SEQ)
id.comm_fortran = USE_COMM_WORLD;
#endif
mumps_c(&id);
// Default Scaling
icntl(8) = 77;
// Assembled matrix
icntl(5) = 0;
/// Default centralized dense second member
icntl(20) = 0;
icntl(21) = 0;
// automatic choice for analysis analysis
icntl(28) = 0;
// fully distributed
icntl(18) = 3;
id.n = n;
id.nz_loc = nz;
id.irn_loc = irn;
id.jcn_loc = jcn;
id.a_loc = a;
id.rhs = rhs;
icntl(1) = -1;
icntl(2) = -1;
icntl(3) = -1;
icntl(4) = 0;
id.job = JOB_COMPLETE;
mumps_c(&id);
id.job=JOB_END;
mumps_c(&id);
printf("Solution is : (%8.2f %8.2f)\n", rhs[0], rhs[1]);
return 0;
}
diff --git a/cmake/FindFFTW.cmake b/cmake/FindFFTW.cmake
index 855fa81..7abde8a 100644
--- a/cmake/FindFFTW.cmake
+++ b/cmake/FindFFTW.cmake
@@ -1,46 +1,46 @@
#
# @file FindFFTW.cmake
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Nicolas Richart
#
if(CMAKE_C_COMPILER_ID MATCHES "Cray")
set(_cray TRUE)
endif()
# Find FFTW2
if (${_cray})
# set(FFTW_LIBRARY "-ldfftw")
set(FFTW_LIBRARY "${FFTW_DIR}/libdfftw.a")
else()
find_library(FFTW_LIBRARY NAMES fftw PATHS ${FFTW}/lib)
find_library(FFTW_LIBRARY NAMES fftw3 PATHS ${FFTW}/lib)
find_path(FFTW_INCLUDES fftw_f77.h ${FFTW}/include)
find_path(FFTW_INCLUDES fftw_f77.i ${FFTW}/include)
find_path(FFTW_INCLUDES fftw.h ${FFTW}/include)
find_path(FFTW_INCLUDES fftw3.h ${FFTW}/include)
endif()
mark_as_advanced(FFTW_LIBRARY FFTW_INCLUDES)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(FFTW DEFAULT_MSG FFTW_LIBRARY)
diff --git a/cmake/FindMETIS.cmake b/cmake/FindMETIS.cmake
index 879b0a9..f194320 100644
--- a/cmake/FindMETIS.cmake
+++ b/cmake/FindMETIS.cmake
@@ -1,62 +1,62 @@
#
# @file FindMETIS.cmake
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Nicolas Richart
#
find_path(METIS_INCLUDE_DIR metis.h
PATHS "${METIS_DIR}"
ENV METIS_DIR
PATH_SUFFIXES include
)
find_library(METIS_LIBRARY NAMES metis
PATHS "${METIS_DIR}"
ENV METIS_DIR
PATH_SUFFIXES lib
)
mark_as_advanced(METIS_LIBRARY METIS_INCLUDE_DIR)
#===============================================================================
include(FindPackageHandleStandardArgs)
if(CMAKE_VERSION VERSION_GREATER 2.8.12)
if(METIS_INCLUDE_DIR)
file(STRINGS ${METIS_INCLUDE_DIR}/metis.h _versions
REGEX "^#define\ +METIS_VER_(MAJOR|MINOR|SUBMINOR) .*")
foreach(_ver ${_versions})
string(REGEX MATCH "METIS_VER_(MAJOR|MINOR|SUBMINOR) *([0-9.]+)" _tmp "${_ver}")
set(_metis_${CMAKE_MATCH_1} ${CMAKE_MATCH_2})
endforeach()
set(METIS_VERSION "${_metis_MAJOR}.${_metis_MINOR}" CACHE INTERNAL "")
endif()
find_package_handle_standard_args(METIS
REQUIRED_VARS
METIS_LIBRARY
METIS_INCLUDE_DIR
VERSION_VAR
METIS_VERSION)
else()
find_package_handle_standard_args(METIS DEFAULT_MSG
METIS_LIBRARY METIS_INCLUDE_DIR)
endif()
diff --git a/cmake/FindPETSc.cmake b/cmake/FindPETSc.cmake
index 67cfc35..27a5299 100644
--- a/cmake/FindPETSc.cmake
+++ b/cmake/FindPETSc.cmake
@@ -1,92 +1,92 @@
#
# @file FindPETSc.cmake
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Nicolas Richart
#
# - Try to find PETSc
# PETSC_FOUND - system has PETSc
# PETSC_INCLUDE_DIRS - the PETSc include directories
# PETSC_LIBRARIES - Link these to use PETSc
# PETSC_VERSION - Version string (MAJOR.MINOR.SUBMINOR)
if(PETSc_FIND_REQUIRED)
find_package(PkgConfig REQUIRED)
else()
find_package(PkgConfig QUIET)
if(NOT PKG_CONFIG_FOUND)
return()
endif()
endif()
pkg_search_module(_petsc PETSc)
# Some debug code
#get_property(_vars DIRECTORY PROPERTY VARIABLES)
#foreach(_var ${_vars})
# if ("${_var}" MATCHES "^_petsc")
# message("${_var} -> ${${_var}}")
# endif()
#endforeach()
if(_petsc_FOUND AND _petsc_VERSION)
set(PETSC_VERSION ${_petsc_VERSION})
endif()
if(_petsc_FOUND)
set(_petsc_libs)
foreach(_lib ${_petsc_LIBRARIES})
string(TOUPPER "${_lib}" _u_lib)
find_library(PETSC_LIBRARY_${_u_lib} ${_lib} PATHS ${_petsc_LIBRARY_DIRS})
list(APPEND _petsc_libs ${PETSC_LIBRARY_${_u_lib}})
mark_as_advanced(PETSC_LIBRARY_${_u_lib})
endforeach()
if (NOT _petsc_INCLUDE_DIRS)
pkg_get_variable(_petsc_INCLUDE_DIRS ${_petsc_MODULE_NAME} includedir)
#message(${_petsc_INCLUDE_DIRS})
endif()
find_path(PETSC_Fortran_INCLUDE_DIRS "finclude/petsc.h"
PATHS ${_petsc_INCLUDE_DIRS}/petsc
NO_CMAKE_PATH
NO_DEFAULT_PATH
)
set(PETSC_LIBRARIES ${_petsc_libs} CACHE FILEPATH "")
set(PETSC_INCLUDE_DIRS ${_petsc_INCLUDE_DIRS} CACHE PATH "")
add_library(petsc::petsc INTERFACE IMPORTED)
set_property(TARGET petsc::petsc PROPERTY INTERFACE_LINK_LIBRARIES ${PETSC_LIBRARIES})
set_property(TARGET petsc::petsc PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${PETSC_INCLUDE_DIRS})
add_library(petsc::petscf INTERFACE IMPORTED)
target_link_libraries(petsc::petscf INTERFACE petsc::petsc)
set_property(TARGET petsc::petscf PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${PETSC_Fortran_INCLUDE_DIRS})
endif()
include (FindPackageHandleStandardArgs)
find_package_handle_standard_args(PETSc
REQUIRED_VARS PETSC_LIBRARIES PETSC_INCLUDE_DIRS
VERSION_VAR PETSC_VERSION)
diff --git a/cmake/FindParMETIS.cmake b/cmake/FindParMETIS.cmake
index 1f0c0df..f0c5425 100644
--- a/cmake/FindParMETIS.cmake
+++ b/cmake/FindParMETIS.cmake
@@ -1,62 +1,62 @@
#
# @file FindParMETIS.cmake
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Nicolas Richart
#
find_path(PARMETIS_INCLUDE_DIR parmetis.h
PATHS "${PARMETIS_DIR}"
ENV PARMETIS_DIR
PATH_SUFFIXES include
)
find_library(PARMETIS_LIBRARY NAMES parmetis
PATHS "${PARMETIS_DIR}"
ENV PARMETIS_DIR
PATH_SUFFIXES lib
)
mark_as_advanced(PARMETIS_LIBRARY PARMETIS_INCLUDE_DIR)
#===============================================================================
include(FindPackageHandleStandardArgs)
if(CMAKE_VERSION VERSION_GREATER 2.8.12)
if(PARMETIS_INCLUDE_DIR)
file(STRINGS ${PARMETIS_INCLUDE_DIR}/parmetis.h _versions
REGEX "^#define\ +PARMETIS_(MAJOR|MINOR|SUBMINOR)_VERSION .*")
foreach(_ver ${_versions})
string(REGEX MATCH "PARMETIS_(MAJOR|MINOR|SUBMINOR)_VERSION *([0-9.]+)" _tmp "${_ver}")
set(_parmetis_${CMAKE_MATCH_1} ${CMAKE_MATCH_2})
endforeach()
set(PARMETIS_VERSION "${_parmetis_MAJOR}.${_parmetis_MINOR}" CACHE INTERNAL "")
endif()
find_package_handle_standard_args(ParMETIS
REQUIRED_VARS
PARMETIS_LIBRARY
PARMETIS_INCLUDE_DIR
VERSION_VAR
PARMETIS_VERSION)
else()
find_package_handle_standard_args(ParMETIS DEFAULT_MSG
PARMETIS_LIBRARY PARMETIS_INCLUDE_DIR)
endif()
diff --git a/cmake/blas.cmake b/cmake/blas.cmake
index f4ef4d5..00fa014 100644
--- a/cmake/blas.cmake
+++ b/cmake/blas.cmake
@@ -1,63 +1,63 @@
#
# @file blas.cmake
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Nicolas Richart
#
set(_default_blas $ENV{BLA_VENDOR})
if(NOT _default_blas)
set(_default_blas All)
endif()
set(BSPLINES_USE_BLAS_VENDOR "${_default_blas}" CACHE STRING "Version of blas to use")
mark_as_advanced(BSPLINES_USE_BLAS_VENDOR)
set_property(CACHE BSPLINES_USE_BLAS_VENDOR PROPERTY STRINGS
All
ACML
ACML_GPU
ACML_MP
ATLAS
Apple
CXML
DXML
Generic
Goto
IBMESSL
Intel
Intel10_32
Intel10_64lp
Intel10_64lp_seq
NAS
OpenBLAS
PhiPACK
SCSL
SGIMATH
SunPerf
)
if(BSPLINES_USE_PARDISO)
set(BSPLINES_USE_BLAS_VENDOR Intel10_64lp CACHE STRING "" INTERNAL)
endif()
set(ENV{BLA_VENDOR} ${BSPLINES_USE_BLAS_VENDOR})
find_package(BLAS REQUIRED)
find_package(LAPACK REQUIRED)
diff --git a/cmake/bsplines-config.cmake.in b/cmake/bsplines-config.cmake.in
index 31605c5..bf7ad55 100644
--- a/cmake/bsplines-config.cmake.in
+++ b/cmake/bsplines-config.cmake.in
@@ -1,54 +1,54 @@
#
# @file bsplines-config.cmake.in
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Trach-Minh Tran
#
# - Config file for the BSPLINES package
# It defines the target the following variables:
# FFLAGS - Fortran compile flags
# BSPLINES_MODS - include directories for bsplines modules
# BSPLINES_LIBS - bsplines library
# BSPLINES_EXTRA_INCS - additional include directories
# BSPLINES_EXTRA_LIBS - additional libraries
# HAS_PARDISO - BSPLINES built with PARDISO
# HAS_MUMPS - BSPLINES built with MUMPS
# MPIEXEC - MPI launcher
# MPIEXEC_NUMPROC_FLAG - Number of MPI processes flag
# Compute paths
get_filename_component(_dir "${CMAKE_CURRENT_LIST_FILE}" PATH)
get_filename_component(_prefix "${_dir}/../.." ABSOLUTE)
# Import the targets
include("${_prefix}/lib/cmake/bsplines-targets.cmake")
# Report other information
set(FFLAGS "@CMAKE_Fortran_FLAGS@")
set(BSPLINES_MODS "${_prefix}/include")
set(BSPLINES_LIBS fft bsplines pppack pputils2)
set(BSPLINES_EXTRA_INCS "@EXTRA_INCS@")
set(BSPLINES_EXTRA_LIBS "@EXTRA_LIBS@")
set(HAS_PARDISO "@HAS_PARDISO@")
set(HAS_MUMPS "@HAS_MUMPS@")
set(MPIEXEC "@MPIEXEC@")
set(MPIEXEC_NUMPROC_FLAG "@MPIEXEC_NUMPROC_FLAG@")
diff --git a/docs/manual/Makefile b/docs/manual/Makefile
index 206a081..f77ff93 100644
--- a/docs/manual/Makefile
+++ b/docs/manual/Makefile
@@ -1,59 +1,59 @@
#
# @file bsplines.tex
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Trach-Minh Tran
#
dvi: bsplines.dvi
pdf: bsplines.pdf
ps: bsplines.ps
.SUFFIXES:
.SUFFIXES: .sgml .html .tex .dvi .pdf .ps .txt
.tex.dvi:
latex $<
@while ( grep "Rerun to get cross-references" \
${<:tex=log} > /dev/null ); do \
latex $<; \
done
latex $<
.dvi.pdf:
dvipdf $<
.dvi.ps:
dvips $<
bsplines.dvi: bsplines.tex driv1.eps fit.eps
solvers.dvi: solvers.tex
dirichlet_2d.dvi: dirichlet_2d.tex
using_bsplines.dvi: using_bsplines.tex
clean:
rm -f *~ *.dvi *.log *.aux *.out *~ *.toc *.flc *.bbl *.blg
distclean: clean
rm -f bsplines.ps
diff --git a/docs/manual/bsplines.tex b/docs/manual/bsplines.tex
index ecc06a9..9b21e88 100644
--- a/docs/manual/bsplines.tex
+++ b/docs/manual/bsplines.tex
@@ -1,1297 +1,1297 @@
%
% @file bsplines.tex
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
% @author Stephan Brunner
%
\documentclass[a4paper]{article}
\usepackage{linuxdoc-sgml}
%\usepackage{a4wide}
\usepackage{graphicx}
\usepackage{hyperref}
\usepackage{amsmath}
%\usepackage{fancybox}
%\usepackage[notref]{showkeys}
\title{\tt BSPLINES Reference Guide}
\author{Trach-Minh Tran, Stephan Brunner, Kurt Appert}
\date{v0.3, February 2012}
\abstract{Generalized splines of any order on irregular grids for
interpolation and solving PDEs with FEM.}
\begin{document}
\maketitle
\tableofcontents
\section{Properties of Splines}
In this section, several properties of splines will be shown in
a more or less rigorous way. The aim is mainly to provide a
minimum mathematical background for using the module \texttt{BSPLINES} for the
\emph{interpolation} problem as well as the \emph{Finite Element Method}
to solve PDEs. More rigorous mathematical proofs can be found
in the book by de Boor~\cite{deBoor}.
\subsection{Recurrence Relation}
We start by defining a finite interval $[a,b]$ subdivided into $N_x$
intervals:
\begin{equation}
a=t_0 \le t_1 \le \ldots \le t_{N_x}=b.
\end{equation}
The sequence $t_i, i=0,\ldots,N_x$ can be irregularly spaced. The $j^{th}$
spline of degree $p$ defined on this sequence of grid points (also called
\textbf{knots}), is denoted by $\Lambda_{j}^{p}$ and can be constructed using
the following recurrence relation. Starting with the \emph{constant} spline
\begin{equation}
\Lambda_i^0(x) =
\begin{cases}
1& \text{if $t_i \le x < t_{i+1}$}, \\
0& \text{otherwise}.
\end{cases}
\end{equation}
the splines of degree $p>0$ for $t_i \le x < t_{i+1}$ can be constructed from
\begin{eqnarray}
\Lambda_i^p &=& w_i^p\Lambda_i^{p-1} + (1-
w_{i+1}^p)\Lambda_{i+1}^{p-1}, \label{eq:recRel}\\
w_{i}^{p} &=& \frac{x-t_i}{t_{i+p}-t_i}.
\end{eqnarray}
Thus the values of all \emph{non-zero}
splines up to degree $p$ in the interval $[t_i,t_{i+1}]$ fit into
the triangular array as shown in Fig.~\ref{fig:allSpl}. Starting from the
first column with $\Lambda_i^0=1$, one can compute each of the $p+1$ entries
in a subsequent column with Eq.~(\ref{eq:recRel}). Applying this procedure
to generate splines on every intervals $[t_i,t_{i+1}], i=0,\ldots,N_{x}-1$
would produce the sequence of $N_{x}+p$ splines of degree $p$:
\( \Lambda_{-p}^{p},\ldots, \Lambda_{N_x-1}^{p}\).
\subsection{Support and positivity}
The linear spline
\begin{equation*}
\Lambda_i^1 = w_i^1\Lambda_i^0 + (1-w_{i+1}^1)\Lambda_{i+1}^{0}
=\frac{x-t_i}{t_{i+1}-t_i}\Lambda_i^0 +
\frac{t_{i+2}-x}{t_{i+2}-t_{i+1}}\Lambda_{i+1}^0
\end{equation*}
consists of 2 \emph{linear pieces} on $[t_i,t_{i+2}]$, forming a $C^0$
function which breaks at $t_{i+1}$ and vanishes outside of this
interval. Likewise, the quadratic spline
\begin{eqnarray*}
\Lambda_i^2 &=& w_i^2\Lambda_i^1 + (1-w_{i+1}^2)\Lambda_{i+1}^{1} \\
&=& w^2_i w^1_i \Lambda_i^0 + [w^2_i(1-w^1_{i+1})+w^1_{i+1}(1-w^2_{i+1})]
\Lambda_{i+1}^0 + (1-w^2_{i+1})(1-w^1_{i+2})\Lambda_{i+2}^0
\end{eqnarray*}
consists of 3 \emph{parabolic pieces} on $[t_i,t_{i+3}]$ that
join to form a $C^1$ function which breaks at $t_{i+1}$ and $t_{i+2}$
and vanishes outside of this interval. In general the spline of
degree $p$ can be expressed as:
\begin{equation}
\Lambda^p_i = \sum_{r=0}^{p}\, b_{i+r}^p\Lambda_{i+r}^0
\end{equation}
where $b_{i+r}^p$ is a sum of products of $p$ linear functions, resulting
in $p+1$ polynomials of degree $p$, joining to form
a $C^{p-1}$ function which breaks at $t_i,\ldots,t_{i+p+1}$
and vanishes outside of the \emph{support} $[t_i,t_{i+p+1}]$. From its construction,
$\Lambda^p_i$ is clearly \emph{strictly positive} on the interior of $[t_i,t_{i+p+1}]$.
\begin{equation}
\Lambda^p_i (x) > 0, \qquad t_i1$:
\[ \sum_{j=i-p+1}^{i}\,\Lambda_j^{p-1} = 1, \]
or that the sum of the next to last column in Fig.~\ref{fig:allSpl} is $1$,
we have, using the recurrence relation (\ref{eq:recRel})
\begin{eqnarray*}
\sum_{j=i-p}^{i}\,\Lambda_j^{p} &=& \sum_{j=i-p}^{i}\,
\left( w^p_j\Lambda_j^{p-1} +(1-w^p_{j+1})\Lambda_{j+1}^{p-1} \right) \\
&=& \sum_{j=i-p+1}^{i}\,w^p_j\Lambda_j^{p-1} +
\sum_{j=i-p+1}^{i}\,(1-w^p_j)\Lambda_j^{p-1} \\
&=& \sum_{j=i-p+1}^{i}\,\Lambda_j^{p-1} = 1.
\end{eqnarray*}
\subsection{Derivative of Splines}
The derivative of the splines of degree $p$ can be expressed in terms of the
splines of degree $p-1$ by the following relation:
\begin{equation}
\label{derivative of splines}
\frac{d}{dx}\Lambda_i^p =
p\left(
\frac{\Lambda_i^{p-1}}{t_{i+p}-t_i}
- \frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
\right).
\end{equation}
A straightforward consequence of this relation is that the splines of order
$p$ are $C^{p-1}$ continuous. The demonstration of Eq.(\ref{derivative of
splines}) is done by induction. One starts with the case $p=1$:
\begin{eqnarray*}
\frac{d}{dx}\Lambda_i^1
& = &
\frac{d}{dx}\left[
w_i^1\Lambda_i^0 + (1-w^1_{i+1})\Lambda_{i+1}^0
\right]
\\
& = &
\frac{d\,w_i^1}{dx}\Lambda_i^0 + \frac{d\,(1-w^1_{i+1})}{dx}\Lambda_{i+1}^0
+ w_i^1\frac{d\,\Lambda_i^0}{dx} + (1-w^1_{i+1})\frac{d\,\Lambda_{i+1}^0}{dx}
\\
& = &
\frac{1}{t_{i+1}-t_i}\Lambda_i^0 - \frac{1}{t_{i+2}-t_{i+1}}\Lambda_{i+1}^0,
\end{eqnarray*}
having used Eq.(\ref{eq:recRel}) and $d\,\Lambda_i^0/dx = 0$. One then assumes
Eq.(\ref{derivative of splines}) true for $p-1$ and demonstrates that it
remains true for $p$. This is done as follows:
\begin{eqnarray}
\label{demo deriv. 1}
\frac{d}{dx}\Lambda_i^p
& = &
\frac{d}{dx}\left[
w_i^p\Lambda_i^{p-1} + (1-w^p_{i+1})\Lambda_{i+1}^{p-1}
\right]
\\
\nonumber
& = &
\frac{d\,w_i^p}{dx}\Lambda_i^{p-1}
+ \frac{d\,(1-w^p_{i+1})}{dx}\Lambda_{i+1}^{p-1}
+ w_i^p\frac{d\,\Lambda_i^{p-1}}{dx}
+ (1-w^p_{i+1})\frac{d\,\Lambda_{i+1}^{p-1}}{dx}
\\
\nonumber
& = &
\frac{\Lambda_i^{p-1}}{t_{i+p}-t_i}
-\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
\\
\label{demo deriv. 2}
&&
+ w_i^p (p-1)\left(
\frac{\Lambda_i^{p-2}}{t_{i+p-1}-t_i}
- \frac{\Lambda_{i+1}^{p-2}}{t_{i+p}-t_{i+1}}
\right)
+ (1-w_{i+1}^p) (p-1)\left(
\frac{\Lambda_{i+1}^{p-2}}{t_{i+p}-t_{i+1}}
- \frac{\Lambda_{i+2}^{p-2}}{t_{i+p+1}-t_{i+2}}
\right)
\end{eqnarray}
having used Eq.(\ref{eq:recRel}) to obtain (\ref{demo deriv. 1}), and the
induction hypothesis to obtain Eq.(\ref{demo deriv. 2}). Now, rearranging the
last two terms of Eq.(\ref{demo deriv. 2}), one easily obtains:
\begin{eqnarray}
\nonumber
\frac{d}{dx}\Lambda_i^p
& = &
\frac{\Lambda_i^{p-1}}{t_{i+p}-t_i}
-\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
\\
\nonumber
&&
+(p-1)\left[
\frac{1}{t_{i+p}-t_i}
\left(
\frac{x-t_i}{t_{i+p-1}-t_i}\Lambda_i^{p-2}
+\frac{t_{i+p}-x}{t_{i+p}-t_{i+1}}\Lambda_{i+1}^{p-2}
\right)
\right.
\\
\nonumber
&&
\hspace{2.cm}
\left.
-\frac{1}{t_{i+p+1}-t_{i+1}}
\left(
\frac{x-t_{i+1}}{t_{i+p}-t_{i+1}}\Lambda_{i+1}^{p-2}
+\frac{t_{i+p+1}-x}{t_{i+p+1}-t_{i+2}}\Lambda_{i+2}^{p-2}
\right)
\right]
\\
\label{demo deriv. 3}
& = &
\frac{\Lambda_i^{p-1}}{t_{i+p}-t_i}
-\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
+ (p-1)\left(
\frac{\Lambda_i^{p-1}}{t_{i+p}-t_i}
-\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
\right)
\\
\nonumber
& = &
p\left(
\frac{\Lambda_i^{p-1} }{t_{i+p}-t_i}
-\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
\right)
\end{eqnarray}
having again used Eq.(\ref{eq:recRel}) to obtain (\ref{demo deriv. 3}). This
completes the demonstration of relation (\ref{derivative of splines}).
\subsection{Integrals of Splines}
With the proper normalization all splines of all degrees have unitary surface:
\begin{equation}
\label{integrals of splines}
\frac{p+1}{t_{i+p+1}-t_i}\int \Lambda_i^p(x)dx = 1.
\end{equation}
This relation holds trivially for $p=0$ and $p=1$. A recursive proof of the
general statement (\ref{integrals of splines}) starts assuming
\begin{equation}
\label{previousInt}
\frac{p}{t_{i+p}-t_i}\int \Lambda_i^{p-1}(x)dx = 1
\end{equation}
to be true. Then using Eq.(\ref{derivative of splines}) multiplied by $x$ and integrating one obtains:
\begin{equation}
\nonumber
\int x \frac{d}{dx}\Lambda_i^p dx = -\int \Lambda_i^p dx =
p\int\left( \frac{x\Lambda_i^{p-1}}{t_{i+p}-t_i}
-\frac{x\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
\right)dx.
\end{equation}
Completing the fractions in the big parentheses in view of using
Eq.(\ref{eq:recRel}) one has
\begin{eqnarray}
\nonumber
\int \Lambda_i^p dx
& = &
-\, p\int \frac{x-t_i}{t_{i+p}-t_i}\Lambda_i^{p-1} dx
- p\int \frac{t_i}{t_{i+p}-t_i}\Lambda_i^{p-1} dx \\
\nonumber
& &
-\, p\int \frac{t_{i+p+1}-x}{t_{i+p+1}-t_{i+1}}\Lambda_{i+1}^{p-1} dx
+ p\int \frac{t_{i+p+1}}{t_{i+p+1}-t_{i+1}}\Lambda_{i+1}^{p-1} dx,
\end{eqnarray}
where the first and the third terms on the right correspond to $-p\int\Lambda_i^p dx $,
Eq.(\ref{eq:recRel}), and can be combined with the left side to yield
\begin{equation}
\label{proofIntegrals}
(1+p)\int\Lambda_i^p dx = t_{i+p+1}-t_i,
\end{equation}
where relation (\ref{previousInt}) has been used for the rest on the right hand side.
This concludes the proof of Eq.(\ref{integrals of splines}).
\subsection{Boundary Conditions}
Applying the recurrence relation to generate \emph{all} the splines on
the finite domain $[t_0,t_{N_x}]$ yields the $N_x+p$ splines
of degree $p$:
\begin{equation}
\Lambda^p_{-p},\Lambda^p_{-p+1},\ldots, \Lambda^p_{N_x-1}. \label{eq:splSeq}
\end{equation}
Note that \emph{additional} knots beyond both ends of $[t_0,t_{N_x}]$ have to
be defined to generate all these splines.
\subsubsection{Periodic splines}
The extra knots are simply defined through periodicity.:
\begin{eqnarray}
t_{-\nu} &=& t_{N_x-\nu}-(b-a), \\
t_{N_{x}+\nu} &=& t_{\nu}+(b-a), \qquad \nu=0,\ldots,p.
\end{eqnarray}
The $p+1$ leftmost splines in (\ref{eq:splSeq}) are thus identical to the rightmost splines:
\begin{equation}
\Lambda^p_{-\nu} = \Lambda^p_{N_x-\nu}, \qquad \nu=0,\ldots,p.
\end{equation}
\subsubsection{Non-periodic splines}
The choice made in \texttt{BSPLINES} is simply:
\begin{equation}
t_{-p} = \cdots = t_{0} = a, \qquad b=t_{N_x}=\cdots=t_{N_x+p}.
\end{equation}
Thus in the first interval $[t_0,t_1]$, the first spline $\Lambda^p_{-p}$
is constructed (refer to the first entry on each of the column of
Fig.~\ref{fig:allSpl}, with $i=0$) as follow:
\begin{eqnarray*}
\Lambda^1_{-1} &=& (1-w^1_{0})\Lambda^0_{0} = \frac{t_{1}-x}{t_{1}-t_{0}}\Lambda^0_{0}\\
\Lambda^2_{-2} &=& (1-w^2_{-1})\Lambda^1_{-1} =\frac{t_{1}-x}{t_{1}-t_{-1}}\Lambda^1_{-1}
=\left(\frac{t_{1}-x}{t_{1}-t_{0}}\right)^{2}\Lambda^0_{0}\\
\cdot & & \qquad\cdot\qquad\qquad\qquad\qquad \cdot \\
\Lambda^p_{-p} &=& (1-w^p_{-p+1})\Lambda^p_{-p+1} =\frac{t_{1}-x}{t_{1}-t_{-p+1}}\Lambda^{p-1}_{-p+1}
=\left(\frac{t_{1}-x}{t_{1}-t_{0}}\right)^{p}\Lambda^0_{0}
\end{eqnarray*}
In the same manner, the generation of the \emph{last} spline $\Lambda^p_{N_x-1}$ (last entry
on each of the column of Fig.~\ref{fig:allSpl}, with $i=N_{x}-1$) yields:
\begin{eqnarray*}
\Lambda^1_{N_x-1} &=& w^1_{N_x-1}\Lambda^0_{N_x-1} =
\frac{x-t_{N_x-1}}{t_{N_x}-t_{N_x-1}}\Lambda^0_{N_x-1} \\
\Lambda^2_{N_x-1} &=& w^2_{N_x-1}\Lambda^1_{N_x-1} =
\frac{x-t_{N_x-1}}{t_{N_x+1}-t_{N_x-1}}\Lambda^1_{N_x-1}
= \left(\frac{x-t_{N_x-1}}{t_{N_x}-t_{N_x-1}}\right)^{2}\Lambda^0_{N_x-1}\\
\cdot & & \qquad\cdot\qquad\qquad\qquad\qquad \cdot \\
\Lambda^p_{N_x-1} &=& w^p_{N_x-1}\Lambda^p_{N_x-1} =
\frac{x-t_{N_x-1}}{t_{N_x+p-1}-t_{N_x-1}}\Lambda^1_{N_x-1}
= \left(\frac{x-t_{N_x-1}}{t_{N_x}-t_{N_x-1}}\right)^{p}\Lambda^0_{N_x-1}
\end{eqnarray*}
Since the sum of all splines is 1 and using the positivity of splines, all the
non-periodic splines, except the first (last) spline should vanish at $x=a$ ($x=b$):
\begin{equation}
\Lambda^p_{r}(a) = \delta_{r,-p}, \qquad\Lambda^p_{r}(b) = \delta_{r,N_x-1}
\end{equation}
The spline derivatives at the boundaries $x=a$ and $x=b$ can be
derived using Eq.(\ref{derivative of splines}) as follow. At $x=a$
(interval $[t_0,t_1]$), by noting that only the spline
$\Lambda^{p-1}_{-p+1}$ is non-zero at $x=a$ (see next to last column
of Fig.{\ref{fig:allSpl}, with $i=0$), it is easy to see that
there are only 2 non-zero derivatives
given by
\begin{equation}
\begin{split}
\frac{d}{dx}\Lambda^p_{-p}(a) =&
-\frac{p\,\Lambda^{p-1}_{-p+1}(a)}{t_1-t_{-p}} = -\frac{p}{t_1-t_0}, \\
\frac{d}{dx}\Lambda^p_{-p+1}(a) =&
\frac{p\,\Lambda^{p-1}_{-p+1}(a)}{t_1-t_{-p+1}} = \frac{p}{t_1-t_0},
\end{split}
\end{equation}
where we have used $t_0=t_{-1}=\ldots=t_{-p}=a$. Likewise, the 2
non-zero derivatives of spline at the other boundary $x=b$ are
\begin{equation}
\begin{split}
\frac{d}{dx}\Lambda^p_{N_x-2}(b) =&
-\frac{p\,\Lambda^{p-1}_{N_x-1}(b)}{t_{N_x+p-1}-t_{N_x-1}} = -\frac{p}{t_{N_x}-t_{N_x-1}}, \\
\frac{d}{dx}\Lambda^p_{N_x-1}(b) =&
\frac{p\,\Lambda^{p-1}_{N_x-1}(b)}{t_{N_x+p-1}-t_{N_x-1}} = \frac{p}{t_{N_x}-t_{N_x-1}},
\end{split}
\end{equation}
where we have used $t_{N_x}=t_{N_x+1}=\ldots=t_{N_x+p}=b$.
\subsubsection{Spline expansion}
In summary, the approximation of a function $f$ defined
in the interval $[a,b]$ using a basis (\textbf{Is this obvious?) }of
splines of degree $p$ associated with the sequence of knots
$t_i, i=-p,\ldots,N_{x}+p$
can be written as
\begin{equation}
f(x) = \sum_{j=-p}^{N_x-1}\, c_j\Lambda^p_j(x), \qquad
\begin{array}{l}
\mbox{support of $\Lambda^p_j$:}\quad [t_{j},t_{j+p+1}],\\
t_i \leq x < t_{i+1} \Longrightarrow \Lambda^p_{i-p}(x),\ldots,
\Lambda^p_{i}(x) \ge 0.
\end{array}
\end{equation}
Note that the \emph{last} spline in the interval $[t_i,t_{i+1}]$, which can be written as
\[ \Lambda^p_{i}(x)=w^p_i(x) \Lambda^{p-1}_{i}(x)=\ldots=w^p_i(x) w^{p-1}_i(x) \ldots
w^{1}_i(x)\Lambda^{0}_{i}(x) \]
\emph{vanishes at the knot} $x=t_i$. Thus at any position $x$, the sum
involves $p+1$ terms except at the knots $t_i$ where there are only $p$ terms.
It is sometimes more convenient to renumber the spline index $j$ so that it
starts from $0$. With this new numbering, the spline expansion becomes
\begin{equation}
f(x) = \sum_{j=0}^{N_x+p-1}\, c_j\Lambda^p_j(x), \qquad
\begin{array}{l}
\mbox{support of $\Lambda^p_j$:}\quad [t_{j-p},t_{j+1}], \\
t_i \leq x < t_{i+1} \Longrightarrow \Lambda^p_{i}(x),\ldots,
\Lambda^p_{i+p}(x) \ge 0.
\end{array}
\label{eq:splExp}
\end{equation}
In the \emph{periodic} case, there are $N_{x}$ \emph{independent}
spline coefficients since
\begin{equation}
c_{N_{x}+\nu} = c_{\nu}, \qquad \nu=0,\ldots,p-1. \label{eq:perSp}
\end{equation}
In the \emph{non-periodic} case,
the first and the last spline coefficients $c_{0},\,c_{N_x+p-1}$ are
respectively the values of $f$ at the end points $a$ and $b$.
The basis functions for both non-periodic and periodic cubic splines
($p=3$) are shown in Fig~.\ref{fig:cubic_splines} where this new numbering is
used.
\begin{figure}[htbp]
\centering
\includegraphics[angle=0,width=\hsize]{driv1}
\caption{The basis of non-periodic and periodic cubic splines. The periodic
splines $\Lambda_{10}$, $\Lambda_{11}$, $\Lambda_{12}$ denote the same
splines as $\Lambda_{0}$, $\Lambda_{1}$, $\Lambda_{2}$ respectively. }
\label{fig:cubic_splines}
\end{figure}
\subsection{Spline Initialization with \texttt{SET\_SPLINE}}
The initialization of a spline is performed by calling the routine
\texttt{SET\_SPLINE}, passing the desired degree $p$ and the
sequence of grid points (or knots) $t_j, j=0,\ldots,N_x$. If
Gauss points on each of the intervals $[t_j,t_{j+1}]$ are needed, a non-zero
value of \texttt{NGAUSS} should be specified. The other input
argument is the \emph{optional} \texttt{LOGICAL} argument \texttt{PERIOD}
to define the periodicity of the splines. By default it is \texttt{.FALSE.}.
The routine returns the 1d spline \texttt{SP} which is of type
\texttt{TYPE(spline1d)}:
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE set_spline(p, ngauss, grid, sp, period)
INTEGER, INTENT(in) :: p, ngauss
DOUBLE PRECISION, INTENT(in) :: grid(:)
LOGICAL, OPTIONAL, INTENT(in) :: period
TYPE(spline1d), INTENT(out) :: sp
LOGICAL, OPTIONAL, INTENT(in) :: period
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
Besides the main characteristics of the spline (degree $p$ of splines,
number of grid intervals, dimension of splines $N_x+p$, etc.)
the following quantities will be determined and stored in \texttt{SP}:
\begin{itemize}
\item values and all the $p$ derivatives of the $p+1$ non-vanishing splines
on each knots $t_j$. These quantities will be used to speed up computation
of the spline expansion (\ref{eq:splExp}).
\item integrals of splines \(I_i=\int\Lambda_i(x)\,dx\).
\end{itemize}
For a 2d spline
\begin{equation}
\Lambda^{p+q}_{ij}(x,y) = \Lambda^p_i(x)\Lambda^q_j(y),
\end{equation}
on a 2d structured mesh defined by the grid points
\texttt{grid1(0:N1), grid2(0:N2)}, the same call as in the 1d case can be
used, except that the scalars \texttt{p, ngauss, period} become 2 element arrays and
the output \texttt{SP} is now of type \texttt{TYPE(spline2d)}:
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
INTEGER :: p(2), ngauss(2)
LOGICAL, OPTIONAL :: period(2)
DOUBLE PRECISION, dimension(:) :: grid1, grid2
TYPE(spline2d) :: sp2d
...
CALL set_spline(p, ngauss, grid1, grid2, sp2d, period)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
The derived type \texttt{spline2d} is a \emph{wrapper} of 2 \texttt{spline1d}
objects which can be accessed through \texttt{sp2d\%sp1} and \texttt{sp2d\%sp2}.
Once \texttt{SET\_SPLINE} is called, the routine \texttt{GET\_DIM} can be
called to inquire the spline's essential characteristics such as dimension,
number of intervals and degree, for both 1d and 2d splines: \par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE get_dim(sp, dim, nx, nidbas)
TYPE(spline1d), INTENT(in) :: sp
INTEGER, INTENT(out) :: dim
INTEGER, OPTIONAL, INTENT(out) :: nx, nidbas
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
Integral of function $\int^b_a\,f(x)dx$ is computed from its spline
\texttt{sp} and splines coefficients in:
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
DOUBLE PRECISION FUNCTION fintg(sp, c)
TYPE(spline1d), INTENT(in) :: sp
DOUBLE PRECISION, INTENT(in) :: c(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
For a 2d functions, the same function should be called with a 2d spline
\texttt{sp} and 2d array $c$.
Finally \texttt{DESTROY\_SP(sp)} should be called when a spline
\texttt{sp} is not needed anymore to clean up memory space.
\subsection{Generating Splines with \texttt{DEF\_BASFUN}}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE def_basfun(xp, sp, fun, left)
DOUBLE PRECISION, INTENT(in) :: xp
TYPE(spline1d), INTENT(in) :: sp
DOUBLE PRECISION, INTENT(out) :: fun(:,:)
INTEGER, OPTIONAL, INTENT(out) :: left
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
This routine computes, for a given point $\mbox{xp}\in [t_0,t_{N_x}]$, the value
and optionally the $m$ derivatives of the $p+1$ splines \texttt{sp} which were previously
defined and returns them in \texttt{fun(1:p+1,1:m+1)} with $m\leq p$. The maximum
number of computed derivatives $m$ is determined by the size of the second dimension
of the array \texttt{fun}. The subroutine will return the optional integer
\texttt{left} defined such that:
\[
t_{\mbox{left}} \leq xp < t_{\mbox{left+1}}, \qquad 0\leq \mbox{left} \leq N_{x.-1}.
\]
\subsection{Example 1: Values and derivatives of all splines}
In this example, we first initialize a cubic spline with
the knot sequence $t_0,\ldots,t_{N_x}$ with \texttt{SET\_SPLINE}
and then call \texttt{DEF\_BASFUN} to compute its values, first and second
derivatives on the mesh points \texttt{xp(1:npts)}.
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
USE BSPLINES
INTEGER, PARAMETER :: nx=10, npts=100
DOUBLE PRECISION :: t(0:nx), xp(npts)
DOUBLE PRECISION, ALLOCATABLE :: fxp0(:,:), fxp1(:,:), fxp2(:,:)
DOUBLE PRECISION :: fun(4,3) ! 4 cubic splines at a given xp
! plus first and second derivatives.
INTEGER :: i, dim, left
TYPE(spline1d) :: sp
!
! Define t(0:nx), xp(npts)
!
CALL set_spline(3, 0, t, sp, period=.FALSE.)
CALL get_dim(sp, dim)
ALLOCATE(fxp0(npts,0:dim-1), fxp1(npts,0:dim-1), fxp2(npts,0:dim-1)
fxp0 = 0.0
fxp1 = 0.0
fxp2 = 0.0
DO i=1,npts
CALL def_basfun(xp(i), sp, fun, left=left)
fxp0(i, left:left+3) = fun(1:4, 1) ! Value
fxp1(i, left:left+3) = fun(1:4, 2) ! 1st derivative
fxp2(i, left:left+3) = fun(1:4, 3) ! 2nd derivative
END DO
DEALLOCATE(fxp0, fxp1, fxp2)
CALL destroy_sp(sp)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
This code fragment will store \texttt{dim=nx+3=13} splines
and theirs first 2 derivatives in \texttt{fxp0}, \texttt{fxp1} and
\texttt{fxp2}. Change the \texttt{period} to \texttt{.TRUE.} to obtain
\emph{periodic} splines.
\section{Spline Interpolation}
Given the interval $[a,b]$ discretized into $\{x_k,\,k=0,\ldots,N_g\}$ with
$x_0=a$ and $x_{N_g}=b$, the problem of interpolating $f(x), x\in [a,b]$ with
splines of degree $p$ is to solve the following equations
for the spline coefficients $c_i$:
\begin{equation}
\sum_{i=0}^{N_x+p-1}\,c_i\Lambda^p_{i}(x_k) = f(x_k),\quad k=0,\ldots,N_g.
\label{eq:intEq}
\end{equation}
The sequence of knots $t_0,\ldots,t_{N_x}$ defines completely the splines
$\Lambda^p_i$ and its choice will be described in the following section.
\subsection{Choice of knots}
If Eqs.~(\ref{eq:intEq}) are the only conditions for our interpolation
problem, the number of equations should match the number
of unknowns $c_i$. The number of knot intervals $N_x$ hence has to verify
\begin{equation}
N_x = N_g-p+1.\label{eq:knotNum}
\end{equation}
For the \emph{periodic} case, taking into account the $p$ periodic spline conditions
(\ref{eq:perSp}) on
$c_i$ and $f(a)=f(b)$, this condition reduces to:
\begin{equation}
N_x = N_g.
\end{equation}
For \emph{odd} values of the spline degree $p$, the knots $t_i$ could be placed on
the \emph{interpolation sites} $x_k$ while when $p$ is even, $t_i$ should not
be on $x_k$ to avoid a \emph{badly conditioned} linear system when solving
Eq.~(\ref{eq:intEq}). This leads to the following choice for $t_i$ in
\texttt{BSPLINES}:
\subsubsection{Periodic splines}
The number of knots $N_x+1$ is \emph{equal} to the number of interpolation points
$N_g+1$ with
\begin{equation}
t_i =
\begin{cases}
x_i & \text{$p$ odd} \\
(x_{i-1}+x_i)/2 & \text{$p$ even}
\end{cases}
,\qquad i=0,\ldots,N_x
\end{equation}
\subsubsection{Non-Periodic splines}
In order to satisfy the equality (\ref{eq:knotNum}), first, the 2 end points
are retained as knots:
\begin{equation}
t_0=x_0, \qquad t_{N_x} = x_{N_g}.
\end{equation}
For even $p$, the first $p/2$ interpolation intervals are \emph{skipped}:
\begin{equation}
t_i = (x_{i+p/2-1} + x_{i+p/2})/2, \qquad i=1,\ldots,N_x-1, \label{eq:evenKnots}
\end{equation}
while for odd $p$, $(p-1)/2$ interpolation points are \emph{skipped}:
\begin{equation}
t_i = x_{i+(p-1)/2 }, \qquad i=1,\ldots,N_x-1. \label{eq:oddKnots}
\end{equation}
Instead of skipping grid points, an alternative would be to supplement the
system of equations (\ref{eq:intEq}) with conditions on derivatives of $f(x)$
at one or both ends of $[a,b]$. This type of boundary conditions is not
implemented in the present version of the \texttt{BSPLINES} module.
\subsection{The collocation matrix}
The \emph{collocation matrix} $\Lambda^p_i(x_k)$ of the interpolation problem
(\ref{eq:intEq}) is a square matrix. Each row has at most $p+1$ non-zero
terms. Let us consider separately the non-periodic and the periodic cases.
\subsubsection{The non-periodic case}
\paragraph{Even spline degree}
From (\ref{eq:evenKnots}), there are $p/2+1$ interpolation points \(x_{0}, \ldots, x_{p/2}\)
in the first knot interval $[t_0,t_1)$. Since there are at most $p+1$ non-zero
splines for any points in each interval (except for $x_0$ where $\Lambda_i(x_0)=\delta_{i,0}$,
the collocation matrix starts as:
\begin{equation}
\left(\begin{array}{llllll}
\Lambda_0(x_{0}) & 0 &\cdots & \cdots & \cdots &\cdots \\
\Lambda_0(x_{1}) &\Lambda_1(x_{1}) &\cdots &\Lambda_p(x_{1}) & 0 &\cdots \\
\vdots &\vdots &\cdots &\vdots & 0 &\cdots \\
\Lambda_0(x_{p/2})&\Lambda_1(x_{p/2}) &\cdots &\Lambda_p(x_{p/2}) & 0 &\cdots \\
0 &\Lambda_1(x_{p/2+1})&\cdots &\Lambda_p(x_{p/2+1})& \Lambda_{p+1}(x_{p/2+1})&0 \\
0 &\ddots &\ddots &\ddots & \ddots &\ddots
\end{array}\right)
\end{equation}
The number of \emph{upper-diagonals} (non including the diagonal) is
obviously determined by the second row of the matrix above, which yields $p-1$.
Since the knot placement is identical for both ends of the interpolation mesh,
the matrix
$\Lambda_i(x_k)$ is \emph{banded} with half-bandwidths
\begin{equation}
kl=ku=p-1
\end{equation}
\paragraph{Odd spline degree}
Applying the same procedure, it is straightforward to show for $p$ odd and
from (\ref{eq:oddKnots}), that $x_0,\ldots.x_{(p-1)/2}$ are located in the
first knot interval $[t_0,t_1)$ and that the matrix has again the same
half-bandwidths as in the even $p$ case.
The resulting interpolation problem can then be solved with
the usual \emph{banded matrix factorization} followed by a \emph{back-solve}
phase.
\subsubsection{The periodic case}
Let consider the matrix for $p=3$ and $N_x=10$ (see lower figure of
Fig.~(\ref{fig:cubic_splines}):
\begin{equation}
\left(\begin{array}{llllll}
\Lambda_0(x_{0}) & \Lambda_1(x_{0}) & \Lambda_2(x_{0}) & 0 & \cdots & \\
0 & \Lambda_1(x_{1}) & \Lambda_2(x_{1}) & \Lambda_3(x_{1}) & 0 &\cdots \\
\vdots & \ddots & \ddots & \ddots & \ddots &\ddots \\
0 & \cdots & 0 & \Lambda_7(x_{7}) & \Lambda_8(x_{7}) & \Lambda_9(x_{7}) \\
\Lambda_0(x_{8}) & 0 & 0 & \cdots &\Lambda_8(x_{8}) & \Lambda_9(x_{8}) \\
\Lambda_0(x_{9}) &\Lambda_1(x_{9}) & 0 & 0 & \cdots & \Lambda_9(x_{9})
\end{array}\right) \label{eq:perMat}
\end{equation}
The matrix is ``almost triangular'' (except for the last 2 rows) and is not
\emph{diagonally dominant}! A more satisfactory (and symmetric in shape) matrix is
however obtained by simply renumbering the splines such that the sequence
starts with $-\lfloor p/2 \rfloor$ instead of $0$. This renumbered splines
are shown in Fig.~\ref{fig:fitSpl} for the cubic
and quadratic periodic splines. With this renumbering, the matrix
(\ref{eq:perMat}) has a more symmetric shape and is diagonally dominant:
\begin{equation}
\left(\begin{array}{lllll}
\Lambda_0(x_{0}) & \Lambda_1(x_{0}) & 0 & \cdots & \Lambda_9(x_{0}) \\
\Lambda_0(x_{1}) & \Lambda_1(x_{1}) & \Lambda_2(x_{1}) & 0 &\cdots \\
\vdots & \ddots & \ddots & \ddots &\ddots \\
0 & \cdots & \Lambda_7(x_{8}) & \Lambda_8(x_{8}) & \Lambda_9(x_{8}) \\
\Lambda_0(x_{9}) & 0 & 0 & \Lambda_8(x_{9}) & \Lambda_9(x_{9})
\end{array}\right) \label{eq:perMatnew}
\end{equation}
In general, for arbitrary $p$ (even and odd values), the collocation matrix
$A=\Lambda_j(x_i)$ can be written as
\begin{equation}
A = B + UV^T
\end{equation}
where $B$ is a banded matrix with half-bandwidths $kl=ku=b=\lfloor p/2\rfloor$ and
rank $N_x$. $U$ and $V$ are $N_x\times 2b$ sparse matrices:
\begin{equation}
U = \left(
\begin{matrix}
I & 0 \\
0 & 0 \\
0 & I
\end{matrix}\right), \qquad
V = \left(
\begin{matrix}
0 & D^T \\
0 & 0 \\
C^T & 0
\end{matrix}\right), \qquad
V^T = \left(
\begin{matrix}
0 & 0 & C \\
D & 0 & 0
\end{matrix}\right), \qquad
\end{equation}
where $C$, $D$ are the $b\times b$ \emph{off-band} sub-matrices and $I$, the
identity matrix. In the cubic spline example considered above, the
\emph{off-band} matrices are simply $1\times 1$ matrices with
$C=\Lambda_9(x_0)$ and $D=\Lambda_0(x_9)$.
The inverse of $A$ can be deduced from the \emph{Sherman-Morrison-Woodbury formula} \cite{Golub}:
\begin{eqnarray*}
A^{-1} &=& B^{-1} - B^{-1}U(1+V^{T}B^{-1}U)^{-1}V^{T}B^{-1} \\
&=& B^{-1} - ZW^{T}B^{-1},
\end{eqnarray*}
where
\begin{eqnarray*}
Z &=& B^{-1}U, \\
H &=& 1+V^{T}B^{-1}U \\
W^T &=& H^{-1}V^{T}.
\end{eqnarray*}
The solution of the interpolation problem $Ax=f$ can then be reduced to a
\emph{factorization} and a \emph{back-solve} phase:
\begin{enumerate}
\item Factorization
\begin{enumerate}
\item Factor: \( B \longleftarrow L_BU_B \)
\item Solve: \( (L_BU_B)Z = U, \quad U\longleftarrow Z \)
\item Compute: \( H = 1+V^{T}Z \)
\item Factor: \( H=L_HU_H \)
\item Solve: \( (L_HU_H)W^{T} = V^{T}, \quad V^{T}\longleftarrow W^{T} \)
\end{enumerate}
\item Back-solve
\begin{enumerate}
\item Solve: \( (L_BU_B)y = f \)
\item Compute: \( t = W^{T}y \)
\item Compute: \( x = y - Zt \)
\end{enumerate}
\end{enumerate}
At the end of the factorization, only the (updated) matrices $B$, $U$ and
$V^{T}$, required in the back-solve phase, need to be saved.
Note that we avoid to store the product
$ZW^T$ because it is a \emph{big} $N_x\times N_x$ matrix.
After the \emph{back-solve} step, the solution $x$ is \emph{shifted back} (by
$\lfloor p/2\rfloor$) and the appropriate periodicity condition is applied to
obtain the spline coefficients $c_j,\, j=0,\ldots,N_x+p-1$, as defined in
(\ref{eq:splExp}).
\begin{figure}[htbp]
\centering
\includegraphics[angle=0,width=\hsize]{fit}
\caption{The periodic cubic and quadratic splines used
for interpolation. The spline knots are indicated by \emph{blue full circles} and
the interpolation points, by \emph{dashed vertical lines} }
\label{fig:fitSpl}
\end{figure}
\subsection{\texttt{PP} representation}
The computation of $f(x)$ using directly the spline expansion Eq.~(\ref{eq:splExp}) can
be costly, because of the evaluation of the splines $\Lambda^p_j(x)$,
especially when interpolating on large number of points. Expanding $f(x)$,
using truncated Taylor series in each interval $[t_\mu,t_{\mu+1}]$, we obtain
the following \emph{Piecewise Polynomial Function} representation (or
\texttt{ppform}) of $f(x)$:
\begin{equation}
f(x) = \sum^p_{k=0}\, \Pi_{k\mu}(x-t_\mu)^k, \quad t_\mu\leq x.
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
\documentclass[a4paper]{article}
\usepackage{amsmath}
\title{\tt Some Notes on Boundary Conditions}
\author{Trach-Minh Tran}
\date{March 2012}
\begin{document}
\maketitle
\section{Neumann BC as an {essential} BC}
The original equation:
\begin{equation}
\mathbf{A \cdot u} = \mathbf{b}
\end{equation}
with the Neumann BC (1D case):
\begin{equation}
\alpha u_1 + \beta u_2 = c.
\end{equation}
From Eq.(20) of \cite{BSPLINES}:
\begin{equation}
\beta = -\alpha =\frac{p}{\Delta_1}
\end{equation}
where $p$ is the degree of spline and $\Delta_1$ is the lenght of the
first insterval.
Transformation $(u_1, \ldots, u_n) \Rightarrow (\hat u_1, \ldots, \hat
u_n)$ defined by
\begin{equation}
\begin{array}{ccc}
\alpha u_1 + \beta u_2 = \hat u_1 & & u_1 = \frac{1}{\alpha}\hat u_1 - \frac{\beta}{\alpha}\hat u_2 \\
u_2 = \hat u_2 & & u_2 = \hat u_2 \\
\vdots & \Longrightarrow & \vdots \\
u_N = \hat u_N & & u_N = \hat u_N.
\end{array}
\end{equation}
The original Neumann BC becomes now a \emph{inhomogeneous Dirichlet}
BC on $\mathbf{\hat u}$:
\begin{equation}
\hat u_1 = c.
\end{equation}
The transformed linear system can be written as:
\begin{equation}
\mathbf{(U^T\cdot A \cdot U)\cdot \hat u} = \mathbf{U^T\cdot b},
\end{equation}
where $\mathbf{U}$ is given by
\begin{equation}
\mathbf{U} =
\left(\begin{matrix}
\frac{1}{\alpha} & -\frac{\beta}{\alpha} & \dots & 0 \\
0 & 1 & \dots & 0 \\
& & \ddots& \vdots \\
0 & 0 & \dots & 1
\end{matrix}\right)
\end{equation}
Thus, all the symmetry, hermiticity or positivity properties of the
original matrix are preserved with this matrix transformation!
\section{Neumann BC as a \emph{natural} BC}
Multiplying the 1D Sturm-Liouville equation (see section 1.1.1 of
\cite{SOLVERS}) by spline $\Lambda_j(x)$ and integrating by parts, we
obtain the following boundary terms:
\begin{equation}
-\Lambda_j(L) C_1(L) \phi'(L) + \Lambda_j(0) C_1(0) \phi'(0)
\end{equation}
To impose $\phi'(0) = a$ and noting that $\Lambda_j(0)=\delta_{j1}$,
you only need to add $[-aC_1(0)]$ to the first element of the
RHS. Likewise, for the BC $\phi'(L) = b$ you only need to add
$[bC_1(L)]$ to the last element of the RHS. No matrix manipulation (as
for the \emph{essential} BC) is required! Notice that if $a$ or $b$ is
zero, nothing needs to be done to impose these BC. That's the reason why
it is called \emph{natural} BC!
A subtle point to be noted here is that using \emph{natural} BC,
$\phi'(0)$ \emph{is not} exaclty equal to $a$, althought it should
converge to $a$ as $(\Delta x)^p$ where $p$ is the spline degree,
while using the \emph{essential} BC, $\phi'(0)=a$ is \emph{exact}!
\section{Diffusion Equation using second order time implicit method}
Let rewrite Eq.(74) of your notes in vector form and replace the
unkowns $n$ by $f$:
\begin{equation}
\mathbf{B} \frac{d \mathbf{f}}{dt} = \mathbf{M\cdot f}.
\end{equation}
Using a \emph{second order time centered} discretization,
\begin{equation}
\begin{split}
\mathbf{B} \left(\frac{\mathbf{f}^{n+1}-\mathbf{f}^{n}}{\Delta t}\right) &=
\mathbf{M} \left(\frac{\mathbf{f}^{n+1}+\mathbf{f}^{n}}{2}\right) \\
\Rightarrow &
\left(\mathbf{B} -\frac{\Delta t}{2}
\mathbf{M}\right)\mathbf{f}^{n+1} =
\left(\mathbf{B} +\frac{\Delta t}{2}
\mathbf{M}\right)\mathbf{f}^{n}
\end{split}
\end{equation}
\emph{Essential} BC has to be imposed on the matrix
\begin{equation}
\mathbf{B} -\frac{\Delta t}{2} \mathbf{M}
\end{equation}
while \emph{natural} BC is introduced while deriving the weak form
leading to the matrix $M$.
This method is \emph{unconditionnaly stable} and second order in
time. When linear splines are used for the space discretization, this
scheme is similar to the well-known \emph{Cranck-Nicolson} (see for
example Wikipedia) discretization for parabolic PDE.
\begin{thebibliography}{99}
\bibitem{BSPLINES} {\tt BSPLINES} Reference Guide.
\bibitem{SOLVERS} {\tt The SOLVERS in BSPLINES} Reference Guide.
\end{thebibliography}
\end{document}
diff --git a/docs/manual/solvers.tex b/docs/manual/solvers.tex
index 8abdf13..5c4bbf8 100644
--- a/docs/manual/solvers.tex
+++ b/docs/manual/solvers.tex
@@ -1,2818 +1,2818 @@
%
% @file solvers.tex
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
\documentclass[a4paper]{article}
\usepackage{linuxdoc-sgml}
\usepackage{graphicx}
\usepackage{hyperref}
\usepackage{amsmath}
%\usepackage{verbatim}
%\usepackage[notref]{showkeys}
\title{\tt The Solvers in BSPLINES}
\author{Trach-Minh Tran}
\date{v0.6, December 2011}
\abstract{Implementation of a common simple interface to popular
solver packages (LAPACK, PARDISO, WSMP, PETSc, etc.). The main goal is to
provide an easy access to these packages in order to solve elliptic and
parabolic as well as some types of integro-differential
equations.}
\begin{document}
\maketitle
\tableofcontents
\section{Matrix form of the problem}
\subsection{Getting started}
\subsubsection{A one-dimensional problem}
Let us start with the one-dimensional Sturm-Liouville differential
equation:
\begin{equation*}
-\frac{d}{dx} \left[C_1(x) \frac{d\phi}{dx}\right] + C_2(x)\phi = \rho,
\end{equation*}
on the domain $0\leq x \leq L$ with suitable boundary conditions.
On a grid with $N$ intervals, the discretized solution $\phi$, using
the splines $\Lambda_i(x)$ of order $p$ can be written as
\begin{equation}
\label{sol1d}
\phi(x) = \sum_{i=0}^{d-1} \phi_i\Lambda_i(x),
\end{equation}
where \cite{BSPLINES}
\begin{equation*}
d =
\begin{cases}
N & \text{if $\phi$ is periodic}, \\
N+p & \text{otherwise},
\end{cases}
\end{equation*}
and $\phi_i$ are the unknowns of the following matrix equation:
\begin{equation}
\label{matEq1d}
\sum_{i'=0}^{d-1} A_{ii'}\phi_{i'} = \rho_i, \qquad i=0,\ldots, d-1.
\end{equation}
Here the matrix $A_{ii'}$ and the right-hand-side $\rho_i$ are
respectively given by:
\begin{equation}
\begin{split}
\label{matCoef1d}
A_{ii'} =& \int_{0}^{L}\!dx\,C_1\Lambda_{i}^{'}\Lambda_{i'}^{'}
+ \int_{0}^{L}\!dx\,C_2\Lambda_{i}\Lambda_{i'}, \\
\rho_i =& \int_{0}^{L}dx \rho\Lambda_i.\\
\end{split}
\end{equation}
For more general differential operators, the matrix coefficients $A_{ii'}$ can be
written as a sum of contributing matrices of the form
\begin{equation}
\label{mat1d}
A_{ii'} = \int_{0}^{L}\!dx\,C\Lambda_{i}^{\alpha}\Lambda_{i'}^{\alpha'},
\end{equation}
where $\Lambda_{i}^{\alpha}$ denotes the $\alpha^\text{th}$
derivative of $\Lambda_{i}$.
As the splines $\Lambda_i$ have a support of $p+1$ intervals, the
matrix is sparse and its
coefficients $A_{ii'}$ are non-zero only for $|i-i'| \leq p$: hence the
matrix has a band structure of bandwidth equal to $2p+1$
if the operator is purely differential. For an
integral equation such as
\begin{equation*}
\int_{0}^{L}\!dx' K(x,x')\phi(x') = \rho(x),
\end{equation*}
the discretization results in a \emph{dense} matrix of the form
\begin{equation}
\label{matIntg1d}
A_{ii'} = \int_{0}^{L}\!dx \Lambda_{i}(x) \int_{0}^{L}\!dx'
K(x,x')\Lambda_{i'}(x').
\end{equation}
Note that when the kernel is separable $K(x,x') = U(x)V(x')$, the
matrix $A_{ii'}$ is a \emph{dyadic}:
\begin{equation}
A_{ii'} = \int_{0}^{L}\!dx U(x) \Lambda_{i}(x) \int_{0}^{L}\!dx
V(x)\Lambda_{i'}(x) = U_iV_{i'}.
\end{equation}
\subsubsection{Periodic boundary conditions}
The splines $\Lambda_i$ are $N$-periodic ($\Lambda_{i+N}(x)=\Lambda_i(x-L)$). This
property can be easily enforced while constructing both $\rho_i$
and the matrix $A_{ii'}$. This results in a solution
$\phi_i$ which is also $N$-periodic.
\subsubsection{Non-periodic boundary conditions}
In {\tt BSPLINES} \cite{BSPLINES}, the constructed non-periodic splines are such that at the
boundaries $x=0$ and $x=L$:
\begin{equation}
\Lambda_i(0) = \delta_{i,0}, \qquad \Lambda_i(L) = \delta_{i,N+p-1},
\end{equation}
which imply that, using (\ref{sol1d})
\begin{equation}
\phi(0) = \phi_0, \qquad \phi(L) = \phi_{N+p-1}.
\end{equation}
It is thus possible to impose the Dirichlet boundary conditions
by a simple modification of the matrix $A_{ii'}$ as shown in
Appendix~\ref{DirichletCond}.
\subsection{Problems in more dimensions}
\subsubsection{Two-dimensional equations}
The results obtained above can be extended in a
straightforward manner. Assuming, for example a
\emph{polar like} $(r,\theta)$ coordinate system,
with the discretized solution and the right-hand side
written as:
\begin{equation}
\label{discreteEq2d}
\begin{split}
\phi(r,\theta) &= \sum_{i=0}^{N_r+p_r-1}\sum_{j=0}^{N_\theta-1}
\phi_{ij}\Lambda_i(r) \Lambda_j(\theta) \\
\rho_{ij} &= \int_{0}^{R}\!dr \int_{0}^{2\pi} \!d\theta J(r,\theta) \rho(r,\theta)
\Lambda_i(r) \Lambda_j(\theta),\\
\end{split}
\end{equation}
where $J(r,\theta)$ is the Jacobian, the matrix equation to solve is
\begin{equation}
\sum_{i'=0}^{N_r+p_r-1}\sum_{j'=0}^{N_\theta-1}
A_{iji'j'}\phi_{i'j'} = \rho_{ij},
\end{equation}
with the matrix $A_{iji'j'}$ expressed as a sum of matrices of the
form:
\begin{equation}
\label{mat2d}
A_{iji'j'} = \int_{0}^{R}\!dr \int_{0}^{2\pi}\!d\theta\,
C(r,\theta)\,\Lambda_{i}^{\alpha}(r)\Lambda_{i'}^{\alpha'}(r)\,
\Lambda_{j}^{\beta}(\theta)\Lambda_{j'}^{\beta'}(\theta).
\end{equation}
\subsubsection{Three-dimensional equations}
Likewise, for the three-dimension case, assuming for example a
\emph{toroidal like} $(r,\theta, \varphi)$ coordinate system,
with the discretized solution and the right-hand side
written as:
\begin{equation}
\label{discreteEq3d}
\begin{split}
\phi(r,\theta, \varphi) &=
\sum_{i=0}^{N_r+p_r-1}\sum_{j=0}^{N_\theta-1}
\sum_{k=0}^{N_\varphi-1}
\phi_{ijk}\Lambda_i(r) \Lambda_j(\theta) \Lambda_k(\varphi) \\
\rho_{ijk} &= \int_{0}^{R}\!dr \int_{0}^{2\pi} \!d\theta
\int_{0}^{2\pi} \!d\varphi J(r,\theta,\varphi) \rho(r,\theta,\varphi)
\Lambda_i(r) \Lambda_j(\theta) \Lambda_k(\varphi),\\
\end{split}
\end{equation}
where $J(r,\theta,\varphi)$ is the Jacobian, the matrix equation to solve is
\begin{equation}
\sum_{i'=0}^{N_r+p_r-1}\sum_{j'=0}^{N_\theta-1}\sum_{k'=0}^{N_\varphi-1}
A_{ijki'j'k'}\phi_{i'j'k'} = \rho_{ijk},
\end{equation}
with the matrix $A_{ijki'j'k'}$ expressed as a sum of matrices of the
form:
\begin{equation}
\label{mat3d}
A_{ijki'j'k'} = \int_{0}^{R}\!dr
\int_{0}^{2\pi}\!d\theta\ \int_{0}^{2\pi}\!d\varphi\,
C(r,\theta,\varphi)\,\Lambda_{i}^{\alpha}(r)\Lambda_{i'}^{\alpha'}(r)\,
\Lambda_{j}^{\beta}(\theta)\Lambda_{j'}^{\beta'}(\theta)
\Lambda_{k}^{\gamma}(\varphi)\Lambda_{k'}^{\gamma'}(\varphi).
\end{equation}
\subsubsection{Unicity condition}
In the case of the polar coordinates $(r,\theta)$ considered above,
the unicity condition on the axis $r=0$ should be imposed. It can be
enforced by modifications of the matrix $A$ as described in
Appendix~\ref{unicityCond}.
\subsubsection{One-dimensional numbering}
For two-dimensional (three-dimensional) problems, the solution
$\phi_{ij}$ ($\phi_{ijk}$) as well as the right-hand-side $\rho_{ij}$
($\rho_{ijk}$) can be conveniently casted into one-dimensional
arrays. As an example, by numbering first the last index, we obtain the
following mappings:
\begin{equation}
\label{map1d}
\mu =
\begin{cases}
j + iN_\theta & \text{two-dimensional case} \\
k + (j + iN_\theta)N_\varphi & \text{three-dimensional case} \\
\end{cases}
\end{equation}
Using such a one-dimensional numbering, the matrix equation for the two and
three dimensional cases takes a more conventional form:
\begin{equation}
\sum_{\mu'=0}^{r-1} A_{\mu\mu'} \phi_{\mu'} = \rho_\mu,
\end{equation}
with the respective matrix ranks $r=(N_r+p_r)N_\theta$ and
$r=(N_r+p_r)N_\theta N_\varphi$. For a pure differential operator, the
matrix $A_{\mu\mu'}$ has a band structure of bandwidth
$b=2(p_r+1)N_\theta-1$ and $b=2(p_r+1)N_\theta N_\varphi-1$ respectively. It is
important to note that, except for the one-dimensional problem, there
are \emph{many} zeros inside the matrix band!
\section{The module {\tt MATRIX}}
\subsection{Interface}
The Fortran module {\tt MATRIX} contains easy-to-use routines to solve the
matrix equation formulated in the previous section, using the direct
solvers of LAPACK. The different matrix storage formats are defined,
using the Fortran derived datatypes. The different types in the
present version are listed in Table~\ref{matTypes}.
\begin{table}[h]
\centering
\begin{tabular}{|l|l|}\hline
{\tt gemat} & General dense matrix \\
{\tt gbmat} & General band matrix \\
{\tt pbmat} & Symmetric positive-definite band matrix \\
{\tt periodic\_mat} & Matrix obtained for example in \\
& one-dimensional periodic problems\\
\hline
\end{tabular}
\caption{The matrix types}
\label{matTypes}
\end{table}
These types define {\tt DOUBLE PRECISION} real matrices.
{\tt DOUBLE COMPLEX} matrices are declared by prefixing
these types with the letter ``{\tt z}'', for example {\tt zgbmat}.
Note that {\tt zpbmat} defines a \emph{hermitian} positive-definite
complex matrix.
The \emph{generic} routines are defined for each of these types in Table~\ref{matRoutines}.
Note that the routine {\tt updtmat} is mainly used for the matrix assembly
while {\tt getxxx} and {\tt putxxx} are rather used to modify the
matrix, for example to impose boundary conditions.
\begin{table}[h]
\centering
\begin{tabular}{|l|l|}
\hline
{\tt init} & Initializes the data structure \\
{\tt destroy} & Free the data structure memory \\
\hline
{\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\
{\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
row $i$, column $j$ \\
{\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
row $i$, column $j$ \\
{\tt vmx} & Returns the matrix-vector product \\
{\tt mcopy} & Copy a matrix to another matrix \\
{\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
{\tt determinant} & Returns the matrix determinant \\
\hline
{\tt factor} & Computes the LU (Cholesky for symmetric/hermitian \\
& matrix) factorization \\
{\tt bsolve} & Solves the linear system using the factorized matrix \\
\hline
\end{tabular}
\caption{The generic routines in the {\tt MATRIX} module}
\label{matRoutines}
\end{table}
The complete description of each routine is given in
Appendix~\ref{matRef}. More information on how to use it can be obtained by
{\tt greping} its name on the examples found in the {\tt examples/} directory.
\subsection{A two-dimensional example}
\label{twodEx}
Let's consider the Poisson equation using the cylindrical coordinates
$(r,\theta)$:
\begin{equation}
-\frac{1}{r}\frac{\partial}{\partial r}
\left[r\frac{\partial\phi}{\partial r}\right]
-\frac{1}{r^2}\frac{\partial^2\phi}{\partial\theta^2} = \rho,
\qquad \phi(r=1,\theta) = 0.
\end{equation}
Assuming the exact solution
\begin{equation*}
\phi(r,\theta) = (1-r^2)r^m\cos m\theta,
\end{equation*}
the right-hand-side becomes
\begin{equation*}
\rho=4(m+1)r^{m}\cos m\theta.
\end{equation*}
The matrix and the right hand-side of the discretized problem are computed as
\begin{equation}
\begin{split}
A_{iji'j'} &= \int_{0}^1\!dr \int_{0}^{2\pi}\!d\theta\,\left[
r\,\Lambda'_{i}(r)\Lambda'_{i'}(r)\,
\Lambda_{j}(\theta)\Lambda_{j'}(\theta) +
\frac{1}{r}\,\Lambda_{i}(r)\Lambda_{i'}(r)\,
\Lambda'_{j}(\theta)\Lambda'_{j'}(\theta) \right] \\
\rho_{ij} &= \int_{0}^1\!dr \int_{0}^{2\pi} \!d\theta\,\,r \rho(r,\theta)
\,\,\Lambda_i(r) \Lambda_j(\theta).
\end{split}
\end{equation}
In the example {\tt pde2d.f90} this problem is treated in detail.
In the following, only the calls to the {\tt MATRIX} routines
are reviewed to show how the matrix problem is solved using the {\tt
MATRIX} module.
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
!
! Declare a General Band matrix
!
USE matrix
USE conmat_mod
TYPE(gbmat) :: mat
!
! Rank and bandwidth. nidbas(1) is the spline order in
! the first dimension r.
!
nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix
kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals
ku = kl ! Number of super-diagnonals
nterms = 2 ! Number of terms in the weak form
!
! Initialize the matrix data structure
!
CALL init(kl, ku, nrank, nterms, mat)
!
! Construct the matrix, using 2D spline splxy
! and impose boundary conditions
!
CALL conmat(splxy, mat, coefeq_poisson)
CALL ibcmat(mat, ny)
!
! Compute the RHS, using the 2D spline splxy
! and impose boundary conditions
!
CALL disrhs(mbess, splxy, rhs)
CALL ibcrhs(rhs, ny)
!
! Factor the matrix and solve
!
CALL factor(mat)
CALL bsolve(mat, rhs, sol)
!
...
CONTAINS
SUBROUTINE coefeq_poisson(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
c(1) = x
idt(1,1) = 1; idt(1,2) = 0
idw(1,1) = 1; idw(1,2) = 0
!
c(2) = 1.d0/x
idt(2,1) = 0; idt(2,2) = 1
idw(2,1) = 0; idw(2,2) = 1
END SUBROUTINE coefeq_poisson
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
Some explanations and remarks:
\begin{itemize}
\item The matrix construction is performed by {\tt conmat} which will
be described later in section \ref{secCONMAT}. The
\emph{weak form} is defined in the \emph{internal}
procedure {\tt coefeq\_poisson} and passed as an argument to
{\tt conmat}. See section \ref{secCONMAT} for a detailed
description of the variables {\tt c, idt, idw} returned by
{\tt coefeq\_poisson}.
\item Boundary conditions are imposed by modifications
of the matrix in subroutine {\tt ibcmat} (see file {\tt the
ibcmat.f90}), using the {\tt MATRIX}
routines {\tt getrow, putrow, getcol, putcol}.
\item The construction of the right-hand-side in {\tt disrhs} (see the
file {\tt disrhs.f90}) is computed using a Gauss quadrature..
\item Using the {\tt pbmat} type instead of {\tt gbmat} (the matrix in
this example is symmetric and positive-definite!) requires only a few
modifications of the program (see the complete example {\tt
pde2d\_pb.f90}):
\begin{itemize}
\item Change the type {\tt gbmat} to {\tt pbmat} in all matrix declarations
\item Change the list of arguments in the routine {\tt init} to
({\tt ku, nrank, nterms, mat})
\item Small changes in the boundary conditions ({\tt ibcmat} and {\tt
ibcrhs}) to take into account the symmetry.
\end{itemize}
\item The module {\tt MATRIX} can be used independently of {\tt
BSPLINES} (which is used here only to compute the matrix and
right-hand-side), for example in a problem discretized using
Finite Differences.
\end{itemize}
\section{Sparse matrix storage}
Using the \emph{band matrix format} for a pure differential operator
requires to store a full bandwidth $b=2(p_r+1)N_\theta-1$ for the
two-dimensional problem as shown in section 1, while there are only
$(2p_r+1)^2$ non-zero elements per matrix row. In three-dimensional
problem, it is much worse since $b=2(p_r+1)N_\theta N\varphi-1$ for
$(2p_r+1)^3$ non-zero elements.
In order to reduce the matrix storage, a solution consists of just
storing the matrix non-zero elements and use the \emph{Sparse Direct
Solvers}. With an optimal \emph{renumbering} strategy (or
\emph{fill-in reducing ordering}), the
size of the factored matrix can be expected to be smaller than the
corresponding band matrix.
An alternative is to use \emph{iterative} methods which usually need
less memory.
Such a sparse matrix is implemented in the {\tt SPARSE} module where
each matrix row is represented by a \emph{\tt linked} list of elements
with sorted column index. The data structure of this sparse matrix is
wrapped up in a the Fortran data type {\tt spmat} for a real matrix and
{\tt zspmat} for a complex matrix. Most of the generic routines which
are already defined in the {\tt MATRIX} module are overloaded for these
matrix types. They are listed in Table~\ref{spmatRoutines}. The complete
documentation of these routines can be found in Appendix~\ref{spmatRef}.
\begin{table}[h]
\centering
\begin{tabular}{|l|l|}
\hline
\emph{Matrix types} & {\tt spmat, zspmat} \\
\hline\hline
{\tt init} & Initializes the data structure \\
{\tt destroy} & Free the data structure memory \\
\hline
{\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\
{\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
row $i$, column $j$ \\
{\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
row $i$, column $j$ \\
\hline
{\tt get\_count} & Get the number of non-zero elements in matrix \\
\hline
\end{tabular}
\caption{The generic routines in the {\tt SPARSE} module}
\label{spmatRoutines}
\end{table}
It should be noted that this module is \emph{not} used
directly in solver problems. One usually uses instead modules which
are specific to a type of (direct or iterative) solver. As will be
shown in the next section, it is the routines in this solver module which
directly calls the routines defined in the {\tt SPARSE} module during the matrix
assembly.
\section{Solvers using the module {\tt SPARSE}}
All the solvers discussed in this section use initially the module
{\tt SPARSE} to construct the sparse matrix. Once this construction
procedure is complete, this matrix is converted to the (usually more
efficient) format used by the solver.
In a time-dependent simulation where the problem matrix
changes but not the sparsity pattern, the subsequent matrix assembly
will be performed directly on this solver's format.
Thus for example, the first time {\tt updtmat} is called
on a new matrix, it is the version from {\tt SPARSE}. Next, if
{\tt updtmat} is called again to modify the matrix, it will be the
solver's version, unless the matrix is re-initialized by a call to
{\tt destroy} followed by {\tt init}. This switch is completely
transparent for the user as shown through an example in the next
section.
\subsection{The PARDISO direct solver}
The interface to PARDISO~\cite{PARDISO} is implemented in the
module {\tt PARDISO\_BSPLINES}.
The matrix type (symmetric, hermitian, positive-definite) is set
by the arguments {\tt nlsym, nlherm} and {\tt nlpos} passed to the
generic routine {\tt init}. All the other generic routines defined in
the module {MATRIX}, plus routines specific to the sparse solver,
are listed in Table~\ref{pardisoRoutines}. The complete documentation
of these routines is given in Appendix~\ref{pardisoRef}.
\begin{table}[h]
\centering
\begin{tabular}{|l|l|}
\hline
\emph{Matrix types} & {\tt pardiso\_mat, zpardiso\_mat} \\
\hline\hline
{\tt init} & Initializes the data structure \\
{\tt destroy} & Free the data structure memory \\
\hline
{\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\
{\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
row $i$, column $j$ \\
{\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
row $i$, column $j$ \\
{\tt vmx} & Returns the matrix-vector product \\
{\tt mcopy} & Copy a matrix to another matrix \\
{\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
{\tt clear\_mat}& Set the matrix elements to zero.\\
{\tt psum\_mat} & Parallel sum of matrices \\
{\tt p2p\_mat} & Point-to-point combine sparse matrix between 2 processes\\
{\tt get\_count}& Get the number of non-zero elements in matrix \\
\hline
{\tt factor} & Factorization \\
{\tt bsolve} & Solves the linear system using the factorized matrix \\
{\tt to\_mat} & Convert to PARDISO CSR matrix format \\
{\tt reord\_mat}& Reordering and symbolic factorization \\
{\tt numfact} & Numerical factorization \\
\hline
\end{tabular}
\caption{The generic routines in the {\tt PARDISO\_BSPLINES} module}
\label{pardisoRoutines}
\end{table}
Below, a complete example solving a simple
two-dimensional Poisson discretized by the 5 point Finite Difference
method illustrates how to use the {\tt PARDISO\_BSPLINES} module.
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
PROGRAM main
USE pardiso_bsplines
IMPLICIT NONE
TYPE(pardiso_mat) :: amat
DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:), arow(:)
INTEGER :: nx=5, ny=4
INTEGER :: n, nnz
INTEGER :: i, j, irow, jcol
!
WRITE(*,'(a)', advance='no') 'Enter nx, ny: '
READ(*,*) nx, ny
n = nx*ny ! Rank of the matrix
ALLOCATE(rhs(n))
ALLOCATE(sol(n))
ALLOCATE(arow(n))
!
CALL init(n, 1, amat, nlsym=.TRUE.) ! Pardiso mat, symmetric case
!
! Construct the matrix and RHS
!
DO j=1,ny
DO i=1,nx
arow = 0.0d0
irow = numb(i,j)
arow(irow) = 4.0d0
IF(i.GT.1) arow(numb(i-1,j)) = -1.0d0
IF(i.LT.nx) arow(numb(i+1,j)) = -1.0d0
IF(j.GT.1) arow(numb(i,j-1)) = -1.0d0
IF(j.LT.ny) arow(numb(i,j+1)) = -1.0d0
CALL putrow(amat, irow, arow)
rhs(irow) = SUM(arow) ! => the exact solution is 1
END DO
END DO
!
WRITE(*,'(a,i6)') 'Number of non-zeros of matrix', get_count(amat)
!
! Factor the amat matrix (Reordering, symbolic and numerical factorization)
!
CALL factor(amat)
!
! Back solve
!
CALL bsolve(amat, rhs, sol)
!
! Check solutions
!
WRITE(*,'(/a/(10f8.4))') 'Computed sol', sol
WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0))
!
! Clean up
!
DEALLOCATE(rhs)
DEALLOCATE(sol)
DEALLOCATE(arow)
CALL destroy(amat)
CONTAINS
INTEGER FUNCTION numb(i,j)
!
! One-dimensional numbering
! Number first x then y
!
INTEGER, INTENT(in) :: i, j
numb = (j-1)*nx + i
END FUNCTION numb
END PROGRAM main
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
It should be noted that
\begin{itemize}
\item The routine {\tt putrow} in the matrix construction loop uses the
version from the {\tt SPARSE} module to create dynamically the
matrix row using the linked list.
\item The routine {\tt factor} calls successively the matrix conversion
{\tt to\_mat}, the reordering and symbolic factorization routine
{\tt reord\_mat} and finally the numerical factorization {\tt
numfact}. One could indeed call these three routines separately
instead of the single call to {\tt factor},
\item After solving the linear system, if the matrix is modified by
calling for example {\tt putrow} again, it will modify
directly the converted matrix and not on the {\tt spmat} matrix
which is anyway \emph{destroyed} at the end of {\tt to\_mat}.
\item If the matrix sparsity changes, the matrix should be
re-initialized by calling the {\tt destroy} and {\tt init} routines.
\end{itemize}
Other examples can be found by running ``{\tt grep pardiso\_mat}''
on the F90 files in the directory {\tt examples/}.
\subsection{The WSMP direct solver}
The interface to WSMP~\cite{WSMP} is implemented in the
module {\tt WSMP\_BSPLINES}.
The matrix type (symmetric, hermitian, positive-definite) is set
by the arguments {\tt nlsym, nlherm} and {\tt nlpos} passed to the
generic routine {\tt init}. All the other generic routines defined in
the module {MATRIX}, plus routines specific to the sparse solver,
are listed in Table~\ref{wsmpRoutines}. The complete documentation
of these routines is given in Appendix~\ref{wsmpRef}.
\begin{table}[h]
\centering
\begin{tabular}{|l|l|}
\hline
\emph{Matrix types} & {\tt wsmp\_mat, zwsmp\_mat} \\
\hline\hline
{\tt init} & Initializes the data structure \\
{\tt destroy} & Free the data structure memory \\
\hline
{\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\
{\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
row $i$, column $j$ \\
{\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
row $i$, column $j$ \\
{\tt vmx} & Returns the matrix-vector product \\
{\tt mcopy} & Copy a matrix to another matrix \\
{\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
{\tt clear\_mat}& Set the matrix elements to zero.\\
{\tt psum\_mat} & Parallel sum of matrices \\
{\tt p2p\_mat} & Point-to-point combine sparse matrix between 2 processes\\
{\tt get\_count}& Get the number of non-zero elements in matrix \\
\hline
{\tt factor} & Factorization \\
{\tt bsolve} & Solves the linear system using the factorized matrix \\
{\tt to\_mat} & Convert to WSMP CSR matrix format \\
{\tt reord\_mat}& Reordering and symbolic factorization \\
{\tt numfact} & Numerical factorization \\
\hline
\end{tabular}
\caption{The generic routines in the {\tt WSMP\_BSPLINES} module}
\label{wsmpRoutines}
\end{table}
The simple Poisson example using the {\tt PARDISO\_BSPLINES} module
shown in the previous section can be easily adapted to the WSMP
interface since there are only two lines to change: the {\tt USE} and
the matrix {\tt TYPE} lines.
Other examples of how to use this interface can be found by running
``{\tt grep wsmp\_mat}'' on the F90 files the directory {\tt examples/}.
The same solver functionality can be found in both the PARDISO and
WSMP solvers as one can verify by comparing Table~\ref{pardisoRoutines}
and Table~\ref{wsmpRoutines} or the description of routines in
Appendix~\ref{pardisoRef} and Appendix~\ref{wsmpRef}.
However, there
is an important difference. While in PARDISO (and indeed also in LAPACK),
it is possible to define several matrices to solve simultaneously, it
appears that in WSMP, this is possible \emph{only} for symmetric and
hermitian matrices: in the present 10.9 version, the
routines to store and recall the solver context
{\tt WSTOREMAT/WRECALLMAT} which are present in the symmetric version of
the library are missing in the general version!
A separate module named {\tt PWSMP\_BSPLINES} added the MPI
\emph{parallelization} capability provided by WSMP. This parallel version
implements the same user interface as shown in Table~\ref{wsmpRoutines}.
The following considerations should be however taken in to account:
\begin{enumerate}
\item The coefficient matrix {\tt amat} is partitioned into blocks of
\emph{contiguous} rows, with their indices defined in
the interval [{\tt amat\%istart,amat\%iend}] which is defined after the call to
{\tt init}.
\item Calls to the routine {\tt updtmat} to update the matrix coefficients
should not specify a row index \emph{outside} this interval.
On the other hand, {\tt getxxx} will return 0 and {\tt putxxx} will
ignore it if a row index \emph{not} in the range [{\tt amat\%istart,amat\%iend}]
is passed to them.
\item An \emph{optional} MPI communicator can be given to {\tt init}
using the keyword {\tt comm\_in}. By default, the communicator
{\tt MPI\_COMM\_WORLD} is used.
\end{enumerate}
A complete example using {\tt PWSMP\_BSPLINES} can be found in
{\tt examples/pde2d\_pwsmp.f90}.
\subsection{The MUMPS direct solver}
{\tt MUMPS}~\cite{MUMPS} is a \emph{parallel sparse direct solver}
using {\tt MPI} and is implemented in the module
{\tt MUMPS\_BSPLINES}. User program using this module
should be compiled and linked with {\tt MPI} even if only the
serial version of the solver is needed, in which case the
{\tt MPI\_COMM\_SELF} is passed to the initialization routine
{\tt init} as an \emph{optional} argument with the keyword
{\tt comm\_in}. Otherwise a valid {\tt MPI} communicator should be
passed. By default {\tt comm\_in=MPI\_COMM\_SELF}. Note that it
is possible to use both serial and parallel solvers in the same
program to solve different matrix problems.
As for {\tt PARDISO} and {\tt WSMP}, the same user interface to the
{\tt MUMPS} solver is used and summarized in
Table~\ref{mumpsRoutines}. The complete documentation
of these routines is given in Appendix~\ref{mumpsRef}.
\begin{table}[h]
\centering
\begin{tabular}{|l|l|}
\hline
\emph{Matrix types} & {\tt mumps\_mat, zmumps\_mat} \\
\hline\hline
{\tt init} & Initializes the data structure \\
{\tt destroy} & Free the data structure memory \\
\hline
{\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\
{\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
row $i$, column $j$ \\
{\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
row $i$, column $j$ \\
{\tt vmx} & Returns the matrix-vector product \\
{\tt mcopy} & Copy a matrix to another matrix \\
{\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
{\tt clear\_mat}& Set the matrix elements to zero.\\
{\tt psum\_mat} & Parallel sum of matrices \\
{\tt p2p\_mat} & Point-to-point combine sparse matrix between 2 processes\\
{\tt get\_count}& Get the number of non-zero elements in matrix \\
\hline
{\tt factor} & Factorization \\
{\tt bsolve} & Solves the linear system using the factorized matrix \\
{\tt to\_mat} & Convert to WSMP CSR matrix format \\
{\tt reord\_mat}& Reordering and symbolic factorization \\
{\tt numfact} & Numerical factorization \\
\hline
\end{tabular}
\caption{The generic routines in the {\tt MUMPS\_BSPLINES} module}
\label{mumpsRoutines}
\end{table}
\section{Fourier solver \cite{McMillan}}
\subsection{The matrix equation in Fourier space}
For a periodic one-dimensional problem, the solution $\phi_i$ and
the right-hand-side $\rho_i$ in (\ref{matEq1d}) are
$N$-periodic. Their Discrete Fourier Transform (DFT) can be defined as
\begin{equation}
\begin{split}
\hat{\phi}_k = \sum_{j=0}^{N-1} \phi_j e^{i\frac{2\pi}{N}kj}, &\qquad
\hat{\rho}_k = \sum_{j=0}^{N-1} \rho_j e^{i\frac{2\pi}{N}kj}, \\
\phi_j = \frac{1}{N}\sum_{k=0}^{N-1} \hat{\phi}_k e^{-i\frac{2\pi}{N}kj}, &\qquad
\rho_j = \frac{1}{N}\sum_{k=0}^{N-1} \hat{\rho}_k e^{-i\frac{2\pi}{N}kj}.
\end{split}
\end{equation}
Taking the DFT of Eq.~(\ref{matEq1d}), we obtain the following matrix equation
in Fourier space:
\begin{equation}
\label{Fourier1d}
\sum_{k'=0}^{N-1} \hat{A}_{kk'}\hat{\phi}_{k'} = \hat{\rho}_{k},
\end{equation}
where $\hat{A}_{kk'}$ is the DFT of the original matrix. Following the
notations in Eq.~(\ref{mat1d}) and assuming an \emph{equidistant} mesh
with the interval $\Delta=L/N$, each of the DFT matrices of the
weak form can be written as
\begin{equation}
\begin{split}
\hat{A}_{kk'}
&= \frac{1}{N}\sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj}
\sum_{j'=0}^{N-1} A_{jj'}e^{-i\frac{2\pi}{N}k'j'} \\
&= \frac{1}{N}\int_{0}^L\!\!dx\,C(x) \,
\sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x)\,
\sum_{j'=0}^{N-1} e^{-i\frac{2\pi}{N}k'j'}
\Lambda_{j'}^{\alpha'} (x) \\
&= \frac{1}{N}\sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!\!dx\,C(x) \,
\sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x)\,
\sum_{j'=0}^{N-1} e^{-i\frac{2\pi}{N}k'j'}
\Lambda_{j'}^{\alpha'} (x)
\end{split}
\end{equation}
Note that each of the last two sums is over the splines which are non-zero
at a given $x$. Using the translational symmetry of the periodic splines:
\begin{equation*}
\sum_j e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x) =
\sum_j e^{i\frac{2\pi}{N}kj}
\Lambda_{j-J}^\alpha(x-J\Delta) =
e^{i\frac{2\pi}{N}kJ}\, \hat{\Lambda}_{k}^\alpha(x-J\Delta),
\end{equation*}
where we have defined the DFT of splines $\hat{\Lambda}_{k}(x)$ as
\begin{equation}
\hat{\Lambda}_{k}^\alpha(x) = \sum_j\Lambda_j^\alpha(x) e^{i\frac{2\pi}{N}kj},
\end{equation}
which are computed by the routine {\tt ft\_basfun} in the module
{\tt BSPLINES} for any spline order $p$ and derivative $\alpha \le
p$. The DFT matrices can now be written as:
\begin{equation*}
\hat{A}_{kk'} =
\frac{1}{N}\sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!\!dx\,C(x) \,
e^{i\frac{2\pi}{N}J(k-k')} \hat{\Lambda}_k^\alpha (x-J\Delta)\,
\left[\hat{\Lambda}_{k'}^{\alpha'} (x-J\Delta)\right]^{*}.
\end{equation*}
Finally, making the variable transform $x\rightarrow x+J\Delta$ and
defining the DFT of the weak-form coefficient $C$ as
\begin{equation}
\hat{C}_{k}(x) = \sum_{J=0}^{N-1}C(x+J\Delta)\,e^{i\frac{2\pi}{N}Jk},
\end{equation}
the DFT of the matrix $\hat{A}_{kk'}$ can be calculated as an
integration over the first interval:
\begin{equation}
\hat{A}_{kk'} = \frac{1}{N}\int_0^\Delta\!\!dx\,
\hat{C}_{k-k'}(x) \,\hat{\Lambda}_{k}^\alpha(x)
\left[\hat{\Lambda}_{k'}^{\alpha'}(x)\right]^{*},
\end{equation}
which can be computed using again the same Gauss formula as before.
For uniform $C$, $\hat A_{kk'}$ is diagonal and the matrix equation
(\ref{Fourier1d}) reduces to a system of equations for the
uncoupled Fourier modes.
When $C$ is non-uniform, $\hat A_{kk'}$ is \emph{dense}.
However in problems where the solution is expected to be
``smooth'', one can keep only a small number of Fourier modes, reducing
thus the rank of $\hat A_{kk'}$. Furthermore, if the coefficients
$C(x)$ of the differential equations are very smooth, peaked at a few (low
order) modes, the DFT matrix can become \emph{sparse}!
The generalization to the two-dimensional problem (\ref{mat2d}) is
straightforward:
\begin{gather}
\hat{A}_{im,i'm'} =
\frac{1}{N_\theta}\int_0^R\!\!dr\left\{\int_0^{\Delta\theta}\!\!d\theta \,
\hat{C}_{m-m'}(r,\theta) \,\hat{\Lambda}_{m}^\beta(\theta)
\left[\hat{\Lambda}_{m'}^{\beta'}(\theta)\right]^{*}\right\}
\Lambda_{i}^\alpha(r)\Lambda_{i'}^{\alpha'}(r) \\
\hat{C}_{m}(r,\theta) =
\sum_{j=0}^{N_\theta-1}C(r,\theta+j\Delta\theta)\, e^{i\frac{2\pi}{N_\theta}jm}.
\end{gather}
Likewise, for the three-dimensional problem (\ref{mat3d}), we obtain
\begin{gather}
\hat{A}_{imn,i'm'n'} =
\frac{1}{N_\theta N_\varphi}\int_0^R\!\!dr\left\{\int_0^{\Delta\theta}\!\!d\theta
\int_0^{\Delta\varphi}\!\!d\varphi \,
\hat{C}_{m-m',n-n'}(r,\theta,\varphi) \,\hat{\Lambda}_{m}^\beta(\theta)
\left[\hat{\Lambda}_{m'}^{\beta'}(\theta)\right]^{*}\,\hat{\Lambda}_{n}^\gamma(\varphi)
\left[\hat{\Lambda}_{n'}^{\gamma'}(\varphi)\right]^{*}\right\}
\Lambda_{i}^\alpha(r)\Lambda_{i'}^{\alpha}(r) \\
\hat{C}_{mn}(r,\theta,\varphi) =
\sum_{j=0}^{N_\theta-1}\sum_{k=0}^{N_\varphi-1}C(r,\theta+j\Delta\theta,\varphi+
k\Delta\varphi)\, e^{i\frac{2\pi}{N_\theta}jm}\,e^{i\frac{2\pi}{N_\varphi}kn}.
\end{gather}
Note that for axi-symmetric systems where the coefficients $C$ do not
depend on $\varphi$
\begin{equation}
\hat{C}_{mn} = \hat{C}_{mn}\delta_{n,0}
\end{equation}
and thus the three-dimensional problem reduces to a set of independent
two-dimensional problems with
\begin{equation}
\begin{split}
\hat{A}^n_{im.i'm'} &= M_n \hat{A}_{im.i'm'} \\
M_n &= \int_0^{\Delta\varphi}\!\!d\varphi \left|\hat{\Lambda}_n(\varphi)\right|^2.
\end{split}
\end{equation}
\subsection{Integral equation}
The DFT matrices for differential operators derived above can be
extended to an integral operator of the following form:
\begin{equation}
\int_{0}^{L}\!dx' K(x,x')\,\phi(x') = \rho(x),
\end{equation}
where $\phi(x)$ is $L$-periodic. Using the same FE discretization as
above results in the following matrix in \emph{real} space:
\begin{equation}
A_{jj'} = \int_{0}^{L}\!dx\,\Lambda_j(x)\,\int_{0}^{L}\!dx'\,
K(x,x')\,\Lambda_{j'}(x'),
\end{equation}
and its DFT
\begin{equation}
\hat{A}_{kk'} = \frac{1}{N}
\sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!dx\,
\sum_{J'=0}^{N-1}\int_{J'\Delta}^{(J'+1)\Delta}\!dx'\,K(x,x')\,
e^{i\frac{2\pi}{N}kJ}\hat{\Lambda}_k(x-J\Delta)\,
e^{-i\frac{2\pi}{N}k'J'}\left[\hat{\Lambda}_{k'}(x-J'\Delta)\right]^{*},
\end{equation}
Now, defining the DFT of the kernel as
\begin{equation}
\hat{K}_{kk'}(x,x') =
\sum_{J=0}^{N-1}\sum_{J'=0}^{N-1}K(x+J\Delta,x'+J'\Delta)\,
e^{i\frac{2\pi}{N}kJ}\,e^{-i\frac{2\pi}{N}k'J'},
\end{equation}
the final expression for the DFT of the matrix $\hat{A}_{kk'}$ reduces to
\begin{equation}
\hat{A}_{kk'} = \frac{1}{N}
\int_0^\Delta\!dx\int_0^\Delta\!dx'\,\hat{K}_{kk'}(x,x')\,
\hat{\Lambda}_k (x)\,
\left[\hat{\Lambda}_{k'}(x')\right]^{*}.
\end{equation}
Again, notice that the dense matrix $\hat{A}$ can become \emph{sparse}
if only a limited number of Fourier modes are retained in the DFT of
the kernel $\hat{K}$.
\subsection{A two-dimensional example with a non-uniform coefficient}
As a check, we considered here a two-dimensional example similar to the example in
section \ref{twodEx} but with a non-uniform coefficient:
\begin{equation}
-\frac{1}{r}\frac{\partial}{\partial r}
\left[rC\frac{\partial\phi}{\partial r}\right]
-\frac{1}{r^2}\frac{\partial}{\partial \theta}
\left[C\frac{\partial\phi}{\partial \theta}\right] = \rho.
\end{equation}
With $C(r,\theta) = 1+\epsilon r\cos\theta$, assuming the same exact solution as in
section (\ref{twodEx})
\begin{equation}
\phi(r,\theta) = (1-r^2)r^m\cos m\theta,
\end{equation}
the right-hand side becomes
\begin{equation}
\begin{split}
\rho(r,\theta) = 4(m+1)r^m\cos m\theta & +
\frac{\epsilon r^m}{2}(4+5m-m/r^2)\cos(m-1)\theta \\
&+ \frac{\epsilon r^m}{2}(4+3m+m/r^2)\cos(m+1)\theta.
\end{split}
\end{equation}
This problem is solved in real space and Fourier space respectively in example
{\tt pde2d\_sym\_pardiso.f90} and example {\tt pde2d\_sym\_pardiso\_dft.f90}.
Both use the {\tt PARDISO\_BSPLINES} module to solve the sparse
matrix equation. It should be noted that the Fourier method should yield the
\emph{same solution} as found with the solver in real space if \emph{all} the
$N_\theta$ Fourier modes are kept.
For the problem defined above with {\tt m=3}, by keeping only the seven Fourier
modes in $[-3,3]$ and the three mode couplings $[-1,0,1]$ in the
Fourier solver, we found that both methods yield the same (up to 5
digits) \emph{relative discretization error}. Furthermore, increasing
the number of Fourier modes to $[-4,4]$ (note that the $m=\pm 4$
Fourier components of the right hand side $\rho$ are not null) does
not increase the accuracy of the computed solution!
In this example, the matrix in Fourier space has a rank which is
$N_\theta/7$ times smaller than in the solver in real space. The number of
non-zeros is also reduced by a factor of $(2p+1)/3$ since only 3
Fourier mode coupling terms are considered.
In general, the efficiency of such a \emph{matrix filter} is expected
to be problem-dependent. The Fourier solver should be tested in real simulations.
\section{The matrix construction module {\tt CONMAT\_MOD}}
\label{secCONMAT}
The module implements the generic matrix construction subroutine
{\tt conmat}, using the algorithm detailed in Appendix~\ref{matAssembly},
for 1D and 2D differential equations. The computed matrix is returned in the
argument {\tt mat} which can be a Lapack band matrix as well as a
PARDISO, WSMP or MUMPS sparse matrix.
The complete interface of the subroutine is given below.
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE conmat(spl, mat, coefeq, maxder)
TYPE(spline1d|spline2d) :: spl
TYPE([z]gbmat|[z]pbmat|[z]periodic_mat|[z]pardiso|...) :: mat
INTEGER, INTENT(in), OPTIONAL :: maxder[(2)]
INTERFACE
SUBROUTINE coefeq(x, [y], idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, [y]
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(:)
END SUBROUTINE coefeq
END INTERFACE
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Construct the FE matrix for 1D or 2D differential operator.
\item[Arguments:] \mbox{}
\begin{verbatim}
spl : 1D or 2D spline
mat : matrix object
coefeq : user provided subroutine (see below)
maxder : Maximum order of the derivatives in the weak form.
Equal to 1 (first derivative) by default.
\end{verbatim}
\end{description}
The subroutine {\tt conmat} includes, in addition to the arguments
{\tt spl, mat} and {\tt maxder} described above,
an user provided subroutine as the third
argument {\tt coefeq} which computes all the weak
form coefficients defined in Eq.(\ref{locMat1d}) and Eq.(\ref{locMat2d})
for a given point ($x$ for 1D case or $(x,y)$ for 2D case). The output
array {\tt c} will contain all the computed $C$ with its corresponding
derivative orders $(d,d')$ returned in {\tt idt, idw} respectively.
Other quantities required to calculate the coefficients $C$ could be
communicated to {\tt coefeq}, using for example a {\tt COMMON} block or
a {\tt MODULE}.
An example of using this module can be found in section \ref{twodEx}.
\appendix
\section{Matrix assembly for differential operators}
\label{matAssembly}
\subsection{1D case}
\subsubsection{Local matrix}
The contribution to the discretized weak-form from the interval
$[x_i, x_{i+1}]$ where $i=1,\ldots,N$, is a sum of the \emph{local matrices}
\begin{equation}
\label{locMat1d}
\begin{split}
A^i_{\alpha\alpha'} &= \int_{x_i}^{x_{i+1}}\!\!dx \;C(x)
\Lambda^d_{\alpha}(x)\Lambda^{d'}_{\alpha'}(x) \\
&\simeq \sum_{g=1}^{G}\,
\underbrace{w_g\Lambda^d_{\alpha}(x_g)\Lambda^{d'}_{\alpha'}(x_g)}_{F_{\alpha\alpha'g}}
\,\underbrace{C(x_g)}_{c_g},
\end{split}
\end{equation}
where a $G$ point Gauss quadrature over the interval $[x_i, x_{i+1}]$
is used to approximate the integral and
$\Lambda^d_{\alpha}$ denotes the $d^{th}$ derivative of splines
which are non zero in the interval $[x_i, x_{i+1}]$. For splines of
degree $p$, $\alpha=0,\ldots,p$. Note that the matrix can be written as a
\emph{matrix-vector product}:
\begin{equation}
\mathbf{A}= \mathbf{F} \cdot \mathbf{c}.
\end{equation}
\subsubsection{Mapping to global matrix}
For $N$ intervals, the number of spline elements of degree $p$, is
$N_e=N+p$, or $N_e=N$ if the system is \emph{periodic}.
Once the local matrix $A_{\alpha\alpha'}$ is formed, it can be added
to the \emph{global} matrix using the mapping:
\begin{equation}
\begin{split}
A^g_{II'} &\leftarrow A^g_{II'} + A^i_{\alpha\alpha'} \\
I = i+\alpha, & \qquad I' = i+\alpha'
\end{split}
\end{equation}
For periodic problems, the indices $I,I'$ are further transformed
by taking into account the periodicity $N$, using for example the
following {\tt FORTRAN} statement
\begin{center} \tt
I = MODULO(I-1,N) + 1 \\
\end{center}
\subsection{2D case}
\subsubsection{Local matrix}
In this case, the local matrix obtained for the grid cell
$[x_i, x_{i+1}]\times[y_j, y_{j+1}]$ takes the form:
\begin{equation}
\label{locMat2d}
\begin{split}
A_{\alpha\alpha'\beta\beta'} &= \int_{x_i}^{x_{i+1}}\!\!dx
\int_{y_j}^{y_{j+1}}\!\!dy\,\Lambda^{d_1}_{\alpha}(x)\Lambda^{d_1'}_{\alpha'}(x)\;C(x,y)
\,\Lambda^{d_2}_{\beta}(y)\Lambda^{d_2'}_{\beta'}(y) \\
&\simeq \sum_{g_1=1}^{G_1}\,
\underbrace{w_{g_1}\Lambda^{d_1}_{\alpha}(x_{g_1})\Lambda^{{d_1}'}_{\alpha'}(x_{g_1})}_{F_{\alpha\alpha'g_1}}
\;\sum_{g_2=1}^{G_2}\,\underbrace{C(x_{g_1},y_{g_2})}_{C_{g_1g_2}}
\underbrace{w_{g_2}\Lambda^{d_2}_{\beta}(y_{g_2})\Lambda^{{d_2}'}_{\beta'}(y_{g_2})}_{G_{\beta\beta'g_2}},
\end{split}
\end{equation}
which can be computed efficiently as \emph{matrix-matrix products}
\begin{equation}
\mathbf{A} = \mathbf{F}\cdot\mathbf{C}\cdot\mathbf{G^{T}}
\end{equation}
\subsubsection{Mapping to global matrix}
The local to global element indices mapping on each of the two
dimensions can be defined as previously as
\begin{equation}
\begin{split}
I = i+\alpha, & \qquad I' = i+\alpha' \\
J = j+\beta, & \qquad J' = j+\beta'
\end{split}
\end{equation}
If any of the 2 dimensions is periodic, the periodic condition have
to be applied to the corresponding global element index as explained
above.
Furthermore, in order to reduce the 4 dimension array $A^g_{II'JJ'}$
to the standard 2 dimension matrix, we number first the elements in
$y$ coordinate and obtain the following index transformation:
\begin{equation}
\mu = J + N^y_e(I-1), \qquad \mu' = J' + N^y_e(I'-1),
\end{equation}
where $N^y_e$ is the number of elements along the $y$ coordinate. The
\emph{global} matrix is then constructed from
\begin{equation}
A^g_{\mu\mu'} \leftarrow A^g_{\mu\mu'} + A_{\alpha\alpha'\beta\beta'}
\end{equation}
\section{The boundary conditions}
\subsection{Dirichlet condition}
\label{DirichletCond}
\subsubsection{1D case}
Let us consider the boundary condition
$u(0)=c$. Since all the splines are 0
at $x=0$, except for the first spline which is equal to 1,
$\Lambda_i(0)=\delta_{i,1}$, we have simply
\begin{equation}
c=u(0) = \sum_{i=1}^N u_i \Lambda_i(0) \quad \Longrightarrow
\quad u_{1} = c.
\end{equation}
The discretized linear system of equations, taking into account of
this BC, can thus be written as
\begin{equation}
\begin{split}
u_1 &= c\\
\sum_{j=2}^N A_{ij}u_j &= f_i - A_{i1}c, \quad i=2,\ldots, N
\end{split}
\end{equation}
or in the following matrix form:
\begin{equation}
\left(\begin{matrix}
1 & 0 & \cdots \\
0 & A_{22} & \cdots \\
\vdots & \vdots & \ddots \\
\end{matrix}\right)
\left(\begin{matrix}
u_{1} \\
u_{2} \\
\vdots\\
u_{N} \\
\end{matrix}\right) =
\left(\begin{matrix}
c \\
f_{2} -cA_{21}\\
\vdots\\
f_{N} -cA_{N1}\\
\end{matrix}\right)
\end{equation}
Note that (1) the transformed matrix preserves any symmetry or
positivity of the original matrix, (2) the first column of the
original matrix has to be saved in order to modify the RHS $f_i$
but only for non zero $c$ and (3) in that case, one needs to save
only $[A_{i1}]_{i=2}^{p+1}$, where $p$ is the spline order.
In summary, the procedure for imposing the Dirichlet BC $u_1=c$ can be
summarized as follows:
\begin{enumerate}
\item Matrix transformation
\begin{enumerate}
\item Clear the matrix row $i=1$ and set its diagonal term
$A_{11}$ to 1.
\item Get the matrix column $A_{j1}, \quad j=2,\ldots,p+1$ and save it.
\item Clear the matrix column $j=1$ and set its diagonal term
$A_{11}$ to 1.
\end{enumerate}
\item RHS transformation
\begin{enumerate}
\item Set $f_1\leftarrow c$.
\item Modify the RHS: $f_i\leftarrow f_i-A_{i1}c, \quad i=2,\ldots,p+1$.
\end{enumerate}
\end{enumerate}
If the original matrix \emph{is not symmetric}, only the steps (1a)
and (2a) are required, since the other steps are only necessary to
preserve the symmetry of the original matrix.
\subsubsection{2D case}
In that case, let us write the solution $u(x,y)$ as
\begin{equation}
u(x,y) = \sum_{i=1}^{N_1}\sum_{j=1}^{N_2} u_{ij} \Lambda_i(x)\Lambda_j(y),
\end{equation}
where $N_1, N_2$ are the number of elements in each
dimension. Assuming the BC $u(0,y) = g(y)$, and since
$\Lambda_i(0)=\delta_{i1}$, the solutions $u_{ij}$
should satisfy
\begin{equation}
\label{dirich_2d}
\sum_{j=1}^{N_2} u_{1j} \Lambda_j(y) = g(y).
\end{equation}
If $g(y)$ is constant $g(y)=c$, we obtain the trivial solution
$u_{1,j}=c$ since $\sum_{j=1}^{N_2} \Lambda_j(y)=1$ \cite{BSPLINES}.
For non-uniform $g$,
at least 2 methods can be used to obtain the $N_2$ unknowns $u_{1j}$
satisfying the equation above:
\begin{enumerate}
\item By \emph{collocating} Eq.(\ref{dirich_2d}) on a \emph{suitable}
set of points $[y_k]_{ k=1}^{N_2}$, the problem is reduced to an
\emph{interpolation} one (see section ``Spline
Interpolation'' in \cite{BSPLINES}).
\item By \emph{minimizing} the residual norm of Eq.(\ref{dirich_2d})
defined as follows:
\begin{gather}
R = \left\|\sum_{j=1}^{N_2} c_{j} \Lambda_j(y)-g(y)\right\|^2 =
\int\!\!dy\left\{\left[\sum_{j=1}^{N_2} c_{j} \Lambda_j(y)\right]^2
- 2g(y)\sum_{j=1}^{N_2} c_{j} \Lambda_j(y) +g^2(y)\right\}\\
\frac{\partial R}{\partial c_k} = 2 \int\!\!dy\left[
\sum_{j=1}^{N_2} c_{j} \Lambda_j(y)\Lambda_k(y)
-g(y)\Lambda_k(y)\right] = 0, \quad k=1,\ldots,N_2,
\end{gather}
the boundary solutions $c_j$ can be calculated by solving the following
\emph{weak-form} of Eq.(\ref{dirich_2d}):
\begin{equation}
\sum_{j=1}^{N_2} c_{j} \int\!\!dy\Lambda_j(y)\Lambda_k(y) =
\int\!\!dy\Lambda_k(y) g(y), \qquad k=1,\ldots,N_2.
\end{equation}
\end{enumerate}
Once the values of $c_j$ known, the procedure described for the 1D case
above can be applied to satisfy each of the $N_2$ conditions $u_{1j}=c_j$.
A full example for solving the cylindrical Laplace equation in
cylindrical coordinates:
\begin{equation}
\begin{split}
\frac{1}{r}\frac{\partial}{\partial r}
\left(r\frac{\partial\phi}{\partial r}\right) &+\frac{1}{r^2}
\frac{\partial^2\phi}{\partial \theta^2} = 0 \\
\phi(r=1,\theta) &= \cos m\theta.
\end{split}
\end{equation}
is given in {\tt bpslines/examples/dirichlet/poisson.f90}.
\subsection{Unicity on the axis}
\label{unicityCond}
Denoting the $N$ solutions at the axis by $(u_1, \ldots, u_N)$ , and
their transforms by $(\hat u_1, \ldots, \hat u_N)$ defined by
\begin{equation} \begin{array}{ccc}
u_1-u_N = \hat u_1 & & u_1 = \hat u_1 + \hat u_N \\
u_2-u_N = \hat u_2 & & u_2 = \hat u_2 + \hat u_N \\
\vdots & \Longrightarrow & \vdots \\
u_{N-1}-u_N = \hat u_{N-1} & & u_{N-1} = \hat u_{N-1} + \hat u_N \\
u_N = \hat u_N & & u_N = \hat u_N,
\end{array} \label{eq:unicity1} \end{equation}
the unicity condition can be specified by simply imposing
\begin{equation}
\hat u_1=\hat u_2=\ldots=\hat u_{N-1}=0. \label{eq:unicity2}
\end{equation}
From (\ref{eq:unicity1}), the \emph{transformation matrix} \(\mathbf U\) is defined
as
\begin{equation}
\mathbf{u} = \mathbf{ U \cdot\hat u}, \qquad \mathbf{U} =
\left(\begin{matrix}
1 & 0 & \dots & 0 & 1 \\
0 & 1 & \dots & 0 & 1 \\
& & \ddots& & \vdots \\
0 & 0 & \dots & 1 & 1 \\
0 & 0 & \dots & 0 & 1
\end{matrix}\right), \quad \mathbf{U^{T}} =
\left(\begin{matrix}
1 & 0 & \dots & 0 & 0 \\
0 & 1 & \dots & 0 & 0 \\
& & \ddots& & \vdots \\
0 & 0 & \dots & 1 & 0 \\
1 & 1 & \dots & 1 & 1
\end{matrix}\right).
\end{equation}
\paragraph{Matrix product \( \mathbf{A\cdot U}\)}
\begin{equation}
\mathbf{ A\cdot U} =
\left(\begin{array}{lllll}
A_{1,1} & A_{1,2} & \dots & A_{1,N-1} & \sum_{j} A_{1,j} \\
A_{2,1} & A_{2,2} & \dots & A_{2,N-1} & \sum_{j} A_{2,j} \\
& & \ddots& & \vdots \\
A_{N-1,1} & A_{N-1,2} & \dots & A_{N-1,N-1} & \sum_{j}A_{N-1,j} \\
A_{N,1} & A_{N,2} & \dots & A_{N,N-1} & \sum_{j}A_{N,j}
\end{array}\right).
\end{equation}
Thus \emph{right multiply by \(\mathbf{U}\)} is equivalent to put the
\emph{the sum of each row on the last column}.
\paragraph{Matrix product \( \mathbf{ U^T \cdot A}\)}
\begin{equation}
\mathbf{ U^T \cdot A} =
\left(\begin{array}{lllll}
A_{1,1} & A_{1,2} & \dots & A_{1,N-1} & A_{1,N} \\
A_{2,1} & A_{2,2} & \dots & A_{2,N-1} & A_{2,N} \\
& & \ddots& & \vdots \\
A_{N-1,1} & A_{N-1,2} & \dots & A_{N-1,N-1} & A_{N-1,N} \\
\sum_{i}A_{i,1} & \sum_{i}A_{i,2} & \dots & \sum_{i}A_{i,N-1} &
\sum_{i}A_{i,N}
\end{array}\right).
\end{equation}
Thus \emph{left multiply by \(\mathbf{\hat U}\)} is equivalent to put the
\emph{the sum of each column on the last row}.
\paragraph{Product \( \mathbf{\hat U \cdot b}\)}
\begin{equation}
\mathbf{\hat b} = \mathbf{U^T\cdot b} =
\left(\begin{array}{l}
b_1 \\
b_2 \\
\vdots \\
b_{N-1} \\
\sum_{i} b_{i}
\end{array}\right),
\end{equation}
\paragraph{Transformation of the original matrix equation}
The full original linear system, obtained from the discretization of the
2D \(r,\theta\) polar coordinates can be written as:
\begin{equation}
\left(\begin{array}{ll}
\mathbf{A} & \mathbf{B} \\
\mathbf{C} & \mathbf{D}
\end{array}\right)
\left(\begin{array}{l}
\mathbf{u} \\
\mathbf{v}
\end{array}\right) =
\left(\begin{array}{l}
\mathbf{b} \\
\mathbf{c}
\end{array}\right), \label{eq:orig_matrix_eq}
\end{equation}
where the solution array is split into the solutions \(\mathbf{u}\) at \(r=0\) and
the solutions \(\mathbf{v}\) on the remaining domain. The transformed system can
thus be written as
\begin{equation*}
\left(\begin{array}{ll}
\mathbf{U^T} & 0 \\
0 & \mathbf{I}
\end{array}\right)
\left(\begin{array}{ll}
\mathbf{A} & \mathbf{B} \\
\mathbf{C} & \mathbf{D}
\end{array}\right)
\left(\begin{array}{ll}
\mathbf{U} & 0 \\
0 & \mathbf{I}
\end{array}\right)
\left(\begin{array}{l}
\mathbf{\hat u} \\
\mathbf{v}
\end{array}\right) =
\left(\begin{array}{ll}
\mathbf{U^T} &0 \\
0 & \mathbf{I}
\end{array}\right)
\left(\begin{array}{l}
\mathbf{b} \\
\mathbf{c}
\end{array}\right),
\end{equation*}
\begin{equation}
\Longrightarrow
\left(\begin{array}{cc}
\mathbf{U^TAU} & \mathbf{U^TB} \\
\mathbf{CU} & \mathbf{D}
\end{array}\right)
\left(\begin{array}{l}
\mathbf{\hat u} \\
\mathbf{v}
\end{array}\right) =
\left(\begin{array}{c}
\mathbf{U^Tb} \\
\mathbf{c}
\end{array}\right),
\end{equation}
Notice that the transformation preserves any symmetry existing in the original system
(\ref{eq:orig_matrix_eq}). The transformed matrix is finally given in the following where
only the modified elements are shown and the sum is only over the first \(N\)
points in \(\theta\) direction. The \(\times\) symbol denotes unmodified elements.
\begin{equation}
\left(\begin{array}{lllllll}
\times & \times & \times & \times & \sum_{j} A_{1,j} & \times & \times \\
\times & \times & \times & \times & \sum_{j} A_{2,j} & \times & \times \\
\times & \times & \times & \times & \vdots & \times & \times \\
\times & \times & \times & \times & \sum_{j} A_{N-1,j} & \times & \times \\
\sum_{i}A_{i,1} & \sum_{i}A_{i,2} & \dots & \sum_{i}A_{i,N-1} &
\sum_{i,j}A_{i,j} & \sum_{i}A_{i,N+1} & \dots \\
\times & \times & \times & \times & \sum_{j} A_{N+1,j} & \times & \times \\
\times & \times & \times & \times & \vdots & \times & \times
\end{array}\right)
\end{equation}
Only the \(N^{th}\) column and the \(N^{th}\) row are affected by the transformation.
Applying now the unicity condition (\ref{eq:unicity2}) the final transformed system
reads:
\begin{equation}
\left(\begin{array}{lllllll}
1 & 0 & \dots & 0 & 0 & 0 & 0 \\
0 & 1 & \dots & 0 & 0 & 0 & 0 \\
0 & 0 & \ddots & 0 & \vdots & 0 & 0 \\
0 & 0 & \dots & 1 & 0 & 0 & 0 \\
0 & 0 & \dots & 0 & \sum_{i,j}A_{i,j} & \sum_{i}A_{i,N+1} & \dots \\
0 & 0 & \dots & 0 & \sum_{j} A_{N+1,j} & \times & \times \\
0 & 0 & \dots & 0 & \vdots & \times & \times
\end{array}\right)
\left(\begin{array}{l}
\hat u_1 \\
\hat u_2 \\
\vdots\\
\hat u_{N-1}\\
\hat u_{N} \\
u_{N+1} \\
\vdots
\end{array}\right) =
\left(\begin{array}{l}
0 \\
0 \\
\vdots\\
0 \\
\sum_{i} b_{i} \\
b_{N+1} \\
\vdots
\end{array}\right).
\end{equation}
\section{{\tt MATRIX} Reference}
\label{matRef}
The following conventions are adopted in the routine descriptions:
\begin{itemize}
\item {\tt [z]} means optional: for example {\tt TYPE([z]gemat)}
declares a variable which can be of type {\tt gemat} or {\tt zgemat}.
\item The symbol ``$|$'' is the logical {\tt OR} operator. Thus
\begin{verbatim}
TYPE([z]gemat|[z]gbmat) :: mat
\end{verbatim}
declares that {\tt mat} can be either of type {\tt gemat}, {\tt
zgemat}, {\tt pbmat} or {\tt zpbmat}.
\item In a same declaration block, if a scalar or array of type {\tt
DOUBLE PRECISION|COMPLEX} is declared together with a matrix object
which can be also complex, both should be either real
or complex. For example in the routine {\tt updtmat}, if {\tt mat}
of type {\tt zgbmat}, {\tt val} should be complex.
\end{itemize}
\subsection{init}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
TYPE([z]gemat) :: mat
SUBROUTINE init(n, nterms, mat ,kmat)
TYPE([z]gbmat) :: mat
SUBROUTINE init(kl, ku, n, nterms, mat, kmat)
TYPE([z]pbmat) :: mat
SUBROUTINE init(ku, n, nterms, mat, kmat)
TYPE([z]periodic_mat) :: mat
SUBROUTINE init(kl, ku, n, nterms, mat, kmat)
INTEGER, INTENT(in) :: kl, ku, n, nterms
INTEGER, OPTIONAL :: kmat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Initialize data structure for matrix
\item[Arguments:] \mbox{}
\begin{verbatim}
n : rank of matrix
kl, ku : number of sub and super diagonals
nterms : number of terms in weak form
kmat : matrix id
mat : matrix object
\end{verbatim}
\end{description}
\subsection{destroy}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE destroy(mat)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Free matrix memory
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{updmat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE updtmat(mat, i, j, val)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
INTEGER, INTENT(IN) :: i, j
DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Update (accumulate) element $A_{ij}$
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
j : column index
val : input value
\end{verbatim}
\end{description}
\subsection{putele}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE putele(mat, i, j, val)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
INTEGER, INTENT(IN) :: i, j
DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Overwrite element $A_{ij}$
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
j : column index
val : input value
\end{verbatim}
\end{description}
\subsection{putrow}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE putrow(mat, i, arr)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
INTEGER, INTENT(IN) :: i
DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Overwrite a matrix row
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
arr : input array
\end{verbatim}
\end{description}
\subsection{putcol}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE putcol(mat, j, arr)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
INTEGER, INTENT(IN) :: j
DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Overwrite a matrix row
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
j : column index
arr : input array
\end{verbatim}
\end{description}
\subsection{getele}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE getele(mat, i, j, val)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
INTEGER, INTENT(IN) :: i, j
DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: val
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Get element $A_{ij}$
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
j : column index
val : output value
\end{verbatim}
\end{description}
\subsection{getrow}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE getrow(mat, i, arr)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
INTEGER, INTENT(IN) :: i
DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Get a matrix row
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
arr : output array
\end{verbatim}
\end{description}
\subsection{getcol}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE getcol(mat, j, arr)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
INTEGER, INTENT(IN) :: j
DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Get a matrix column
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : column index
arr : output array
\end{verbatim}
\end{description}
\subsection{vmx}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
FUNCTION vmx(mat, x)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
DOUBLE PRECISION|COMPLEX, DIMENSION(:), INTENT(in) :: x
DOUBLE PRECISION|COMPLEX, DIMENSION(SIZE(x)) :: vmx
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Matrix-vector product $Ax$
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
x : input array
vmx : output array
\end{verbatim}
\end{description}
\subsection{mcopy}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE mcopy(mata, matb)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mata, matb
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Matrix copy: $B = A$
\item[Arguments:] \mbox{}
\begin{verbatim}
mata : input matrix object
matb : output matrix object
\end{verbatim}
\end{description}
\subsection{maddto}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE maddto(mata, alpha, matb)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mata, matb
DOUBLE PRECISION|COMPLEX : alpha
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Matrix addition: $A \leftarrow A+\alpha B$
\item[Arguments:] \mbox{}
\begin{verbatim}
mata : input matrix object
matb : output matrix object
alpha : input scalar
\end{verbatim}
\end{description}
\subsection{determinant}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE determinant(mat, base, pow)
TYPE([z]gemat|[z]gbmat|[z]pbmat) :: mat
INTEGER, INTENT(out) :: pow
DOUBLE PRECISION|COMPLEX : base
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Returns the determinant of matrix as $D = \text{base}\times 10^{\text{pow}}$
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : input matrix object
base : mantissa of determinant
pow : exponent of determinant
\end{verbatim}
\end{description}
\subsection{factor}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE factor(mat)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
LU (Cholesky for symmetric/hermitian matrix) factorization
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : inout matrix object
\end{verbatim}
\end{description}
\subsection{bsolve}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE bsolve(mat)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
DOUBLE PRECISION|COMPLEX, DIMENSION [(:)] :: rhs
DOUBLE PRECISION|COMPLEX, DIMENSION [(:),] OPTIONAL, INTENT (out) :: sol
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Solve the linear system using the factored matrix, for a single or
multiple right-hand-side
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : input factored matrix object
rhs : input right-hand-side, overwriten by the solution if sol is not present
sol : contains solution
\end{verbatim}
\end{description}
\section{{\tt SPMAT} Reference}
\label{spmatRef}
The following conventions are adopted in the routine descriptions:
\begin{itemize}
\item {\tt [z]} means optional: for example {\tt TYPE([z]gemat)}
declares a variable which can be of type {\tt gemat} or {\tt zgemat}.
\item The symbol ``$|$'' is the logical {\tt OR} operator. Thus
\begin{verbatim}
TYPE([z]gemat|[z]gbmat) :: mat
\end{verbatim}
declares that {\tt mat} can be either of type {\tt gemat}, {\tt
zgemat}, {\tt pbmat} or {\tt zpbmat}.
\item In a same declaration block, if a scalar or array of type {\tt
DOUBLE PRECISION|COMPLEX} is declared together with a matrix object
which can be also complex, both should be either real
or complex. For example in the routine {\tt updtmat}, if {\tt mat}
of type {\tt zgbmat}, {\tt val} should be complex.
\end{itemize}
\subsection{init}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE init(n, mat, istart, iend)
TYPE([z]spmat) :: mat
INTEGER, INTENT(in), OPTIONAL :: istart, iend
INTEGER, INTENT(in) :: n
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Initialize an empty sparse matrix of $n$ rows.
\item[Arguments:] \mbox{}
\begin{verbatim}
n : rank of matrix
mat : matrix object
istart, iend : range of row indices. By default istart=1, iend=n
\end{verbatim}
\end{description}
\subsection{destroy}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE destroy(mat)
TYPE([z]spmat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Free matrix memory
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{updmat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE updtmat(mat, i, j, val)
TYPE([z]spmat) :: mat
INTEGER, INTENT(IN) :: i, j
DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Update (accumulate) an existing element $A_{ij}$ or insert it in the
linked list in an increasing order in the column index j.
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
j : column index
val : input value
\end{verbatim}
\end{description}
\subsection{putele}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE putele(mat, i, j, val, nlforce_zero)
TYPE([z]pbmat) :: mat
INTEGER, INTENT(IN) :: i, j
DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val
LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Overwrite an existing element $A_{ij}$ or insert it in the
linked list in an increasing order in the column index j.
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
j : column index
val : input value
nlforce_zero : Never remove an existing element when input is zero if TRUE
FALSE by default
\end{verbatim}
\end{description}
\subsection{putrow}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE putrow(mat, i, arr, col, nlforce_zero)
TYPE([z]spmat) :: mat
INTEGER, INTENT(IN) :: i
DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:)
INTEGER, INTENT(in), OPTIONAL :: col(:)
LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Overwrite a matrix row
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
arr : input array
col : input array containing column indices
nlforce_zero : Never remove an existing element when input is zero if TRUE
FALSE by default
\end{verbatim}
\end{description}
\subsection{putcol}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE putcol(mat, j, arr, nlforce_zero)
TYPE([z]spmat) :: mat
INTEGER, INTENT(IN) :: j
DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Overwrite a matrix row
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
j : column index
arr : input array
nlforce_zero : Never remove an existing non-zero element if .TRUE.
.FALSE. by default
\end{verbatim}
\end{description}
\subsection{getele}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE getele(mat, i, j, val)
TYPE([z]spmat) :: mat
INTEGER, INTENT(IN) :: i, j
DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: val
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Get element $A_{ij}$
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
j : column index
val : output value
\end{verbatim}
\end{description}
\subsection{getrow}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE getrow(mat, i, arr, col)
TYPE([z]spmat) :: mat
INTEGER, INTENT(IN) :: i
DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:)
INTEGER, INTENT(out), OPTIONAL :: col(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Get a matrix row and optionally the column indices
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
arr : output array
col : output array containing column indices
\end{verbatim}
\end{description}
\subsection{getcol}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE getcol(mat, j, arr)
TYPE([z]spmat) :: mat
INTEGER, INTENT(IN) :: j
DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Get a matrix column
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : column index
arr : output array
\end{verbatim}
\end{description}
\subsection{get\_count}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
INTEGER FUNCTION get_count(mat, nnz)
TYPE([z]spmat) :: mat
INTEGER, INTENT(out), OPTIONAL :: nnz(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Returns the number of non-zeros and optionally an array of numbers
of non-zeros on each row
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
nnz : array containing numbers of non-zeros on each row.
\end{verbatim}
\end{description}
\section{{\tt PARDISO\_BSPLINES} Reference}
\label{pardisoRef}
The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow,
getcol, vmx, mcopy, maddto} and {\tt destroy}
have \emph{exactly} the same list of arguments as
those from the {\tt MATRIX} module (as documented in
Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines
that have different arguments. The same conventions as before are used
for the routine description.
\subsection{init}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, &
& nlforce_zero)
INTEGER, INTENT(in) :: n, nterms
TYPE([z]pardiso_mat) :: mat
INTEGER, OPTIONAL, INTENT(in) :: kmat
LOGICAL, OPTIONAL, INTENT(in) :: nlsym
LOGICAL, OPTIONAL, INTENT(in) :: nlpos
LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Initialize the PARDISO solver. A SPMAT matrix of $n$ empty rows is initialized.
\item[Arguments:] \mbox{}
\begin{verbatim}
n : rank of matrix
nterms : number of terms in weak form
kmat : matrix id
mat : matrix object
nlsym : symmetric or not. Default is .FALSE.
nlherm : Hermitian or not for complex matrix . Default is .FALSE.
nlpos : Positive-definite or not. Default is .TRUE.
nlforce_zero : Never remove an existing non-zero element if .TRUE.
.TRUE. by default
\end{verbatim}
\end{description}
\subsection{clear\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE clear_mat(mat)
TYPE([z]pardiso_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Clear matrix, keeping its sparse structure unchanged
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{psum\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE sum_mat(mat, comm)
TYPE([z]pardiso_mat) :: mat
INTEGER, INTENT(in) :: comm
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Parallel sum of matrices. Result matrix is placed in the sparse
matrix mat\%mat on all processes of comm.
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
comm : communicator
\end{verbatim}
\end{description}
\subsection{p2p\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE p2p_mat(mat, dest, extyp, op, comm)
TYPE([z]pardiso_mat) :: mat
INTEGER, INTENT(in) :: dest
CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv')
CHARACTER(len=*), INTENT(in) :: op ! ('put', 'updt')
INTEGER, INTENT(in) :: comm
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Point-to-point combine sparse matrix between 2 processes.
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
dest : rank of remote process
extyp : exchange type ('send', 'recv', 'sendrecv')
op : operation type ('put', 'updt')
comm : communicator
\end{verbatim}
\end{description}
\subsection{get\_count}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
INTEGER FUNCTION get_count(mat, nnz)
TYPE([z]pardiso_mat) :: mat
INTEGER, INTENT(out), OPTIONAL :: nnz(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Returns the number of non-zeros and optionally an array of numbers
of non-zeros on each row
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
nnz : array containing numbers of non-zeros on each row.
\end{verbatim}
\end{description}
\subsection{factor}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE factor(mat, nlreord, nlmetis, debug)
TYPE([z]pardiso_mat) :: mat
LOGICAL, OPTIONAL, INTENT(in) :: nlreord
LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
LOGICAL, OPTIONAL, INTENT(in) :: debug
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Wrapper of to\_mat, reord\_mat and numfact
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
nlreord : call reord_mat if .TRUE. (default is .TRUE.)
nlmetis : use METIS nested dissection for reoredering. Default
is minimum degree alogorithm.
debug : verbose output from PARDISO if .TRUE. Default is .FALSE.
\end{verbatim}
\end{description}
\subsection{bsolve}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE bsolve_pardiso_mat1(mat, rhs, sol, nref, debug)
TYPE([z]pardiso_mat) :: mat
DOUBLE PRECISION|COMPLEX :: rhs(:)
DOUBLE PRECISION|COMPLEX, OPTIONAL :: sol(:)
INTEGER, OPTIONAL :: nref
LOGICAL, OPTIONAL, INTENT(in) :: debug
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Wrapper of to\_mat, reord\_mat and numfact
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
rhs : input right-hand-side, overwriten by the solution if sol is not present
sol : contains solution
ref : maximum number of refinement steps. Default is 0 (no refinement).
debug : verbose output from PARDISO if .TRUE. Default is .FALSE.
\end{verbatim}
\end{description}
\subsection{to\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE to_mat(mat)
TYPE([z]pardiso_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Convert linked list spmat to pardiso matrix structure
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{reord\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE reord_mat(mat, nlmetis, debug)
TYPE([z]pardiso_mat) :: mat
LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
LOGICAL, OPTIONAL, INTENT(in) :: debug
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Reordering and symbolic factorization
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
nlmetis : use METIS nested dissection for reoredering. Default
is minimum degree alogorithm.
debug : verbose output from PARDISO if .TRUE. Default is .FALSE.
\end{verbatim}
\end{description}
\subsection{numfact}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE numfact(mat, debug)
TYPE([z]pardiso_mat) :: mat
LOGICAL, OPTIONAL, INTENT(in) :: debug
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Numerical factorization
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
debug : verbose output from PARDISO if .TRUE. Default is .FALSE.
\end{verbatim}
\end{description}
\section{{\tt [P]WSMP\_BSPLINES} Reference}
\label{wsmpRef}
The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow,
getcol, vmx, mcopy, maddto} and {\tt destroy}
have \emph{exactly} the same list of arguments as
those from the {\tt MATRIX} module (as documented in
Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines
that have different arguments. The same conventions as before are used
for the routine description.
\subsection{init}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, &
& nlforce_zero, [comm_in])
INTEGER, INTENT(in) :: n, nterms
TYPE([z]wsmp_mat) :: mat
INTEGER, OPTIONAL, INTENT(in) :: kmat
LOGICAL, OPTIONAL, INTENT(in) :: nlsym
LOGICAL, OPTIONAL, INTENT(in) :: nlpos
LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
INTEGER, OPTIONAL, INTENT(in) :: comm_in
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Initialize the WSMP solver. A SPMAT matrix of $n$ empty rows is initialized.
\item[Arguments:] \mbox{}
\begin{verbatim}
n : rank of matrix
nterms : number of terms in weak form
kmat : matrix id
mat : matrix object
nlsym : symmetric or not. Default is .FALSE.
nlherm : Hermitian or not for complex matrix . Default is .FALSE.
nlpos : Positive-definite or not. Default is .TRUE.
nlforce_zero : Never remove an existing non-zero element if .TRUE.
.TRUE. by default
comm_in : MPI communicator. By default MPI_COMM_WORLD (only in PWSMP_BSPLINES)
\end{verbatim}
\end{description}
\subsection{clear\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE clear_mat(mat)
TYPE([z]wsmp_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Clear matrix, keeping its sparse structure unchanged
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{psum\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE sum_mat(mat, comm)
TYPE([z]wsmp_mat) :: mat
INTEGER, INTENT(in) :: comm
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Parallel sum of matrices. Result matrix is placed in the sparse
matrix mat\%mat on all processes of comm.
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
comm : communicator
\end{verbatim}
\end{description}
\subsection{p2p\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE p2p_mat(mat, dest, extyp, op, comm)
TYPE([z]wsmp_mat) :: mat
INTEGER, INTENT(in) :: dest
CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv')
CHARACTER(len=*), INTENT(in) :: op ! ('put', 'updt')
INTEGER, INTENT(in) :: comm
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Point-to-point combine sparse matrix between 2 processes.
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
dest : rank of remote process
extyp : exchange type ('send', 'recv', 'sendrecv')
op : operation type ('put', 'updt')
comm : communicator
\end{verbatim}
\end{description}
\subsection{get\_count}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
INTEGER FUNCTION get_count(mat, nnz)
TYPE([z]wsmp_mat) :: mat
INTEGER, INTENT(out), OPTIONAL :: nnz(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Returns the number of non-zeros and optionally an array of numbers
of non-zeros on each row
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
nnz : array containing numbers of non-zeros on each row.
\end{verbatim}
\end{description}
\subsection{factor}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE factor(mat, nlreord)
TYPE([z]wsmp_mat) :: mat
LOGICAL, OPTIONAL, INTENT(in) :: nlreord
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Wrapper of to\_mat, reord\_mat and numfact
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
nlreord : call reord_mat if .TRUE. (default is .TRUE.)
\end{verbatim}
\end{description}
\subsection{bsolve}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE bsolve_wsmp_mat1(mat, rhs, sol, nref)
TYPE([z]wsmp_mat) :: mat
DOUBLE PRECISION|COMPLEX :: rhs(:)
DOUBLE PRECISION|COMPLEX, OPTIONAL :: sol(:)
INTEGER, OPTIONAL :: nref
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Wrapper of to\_mat, reord\_mat and numfact
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
rhs : input right-hand-side, overwriten by the solution if sol is not present
sol : contains solution
ref : maximum number of refinement steps. Default is 0 (no refinement).
debug : verbose output from WSMP if .TRUE. Default is .FALSE.
\end{verbatim}
\end{description}
\subsection{to\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE to_mat(mat)
TYPE([z]wsmp_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Convert linked list spmat to wsmp matrix structure
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{reord\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE reord_mat(mat)
TYPE([z]wsmp_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Reordering and symbolic factorization
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{numfact}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE numfact(mat)
TYPE([z]wsmp_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Numerical factorization
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\section{{\tt MUMPS\_BSPLINES} Reference}
\label{mumpsRef}
The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow,
getcol, vmx, mcopy, maddto} and {\tt destroy}
have \emph{exactly} the same list of arguments as
those from the {\tt MATRIX} module (as documented in
Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines
that have different arguments. The same conventions as before are used
for the routine description.
\subsection{init}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, &
& nlforce_zero, comm_in)
INTEGER, INTENT(in) :: n, nterms
TYPE([z]mumps_mat) :: mat
INTEGER, OPTIONAL, INTENT(in) :: kmat
LOGICAL, OPTIONAL, INTENT(in) :: nlsym
LOGICAL, OPTIONAL, INTENT(in) :: nlpos
LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
INTEGER, OPTIONAL, INTENT(in) :: comm_in
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Initialize the MUMPS solver. A SPMAT matrix of $n$ empty rows is initialized.
\item[Arguments:] \mbox{}
\begin{verbatim}
n : rank of matrix
nterms : number of terms in weak form
kmat : matrix id
mat : matrix object
nlsym : symmetric or not. Default is .FALSE.
nlherm : Hermitian or not for complex matrix . Default is .FALSE.
nlpos : Positive-definite or not. Default is .TRUE.
nlforce_zero : Never remove an existing non-zero element if .TRUE.
.TRUE. by default
comm_in : MPI communicator. By default MPI_COMM_SELF (serial mode).
\end{verbatim}
\end{description}
\subsection{clear\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE clear_mat(mat)
TYPE([z]mumps_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Clear matrix, keeping its sparse structure unchanged
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{psum\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE sum_mat(mat, comm)
TYPE([z]mumps_mat) :: mat
INTEGER, INTENT(in) :: comm
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Parallel sum of matrices. Result matrix is placed in the sparse
matrix mat\%mat on all processes of comm.
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
comm : communicator
\end{verbatim}
\end{description}
\subsection{p2p\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE p2p_mat(mat, dest, extyp, op, comm)
TYPE([z]mumps_mat) :: mat
INTEGER, INTENT(in) :: dest
CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv')
CHARACTER(len=*), INTENT(in) :: op ! ('put', 'updt')
INTEGER, INTENT(in) :: comm
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Point-to-point combine sparse matrix between 2 processes.
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
dest : rank of remote process
extyp : exchange type ('send', 'recv', 'sendrecv')
op : operation type ('put', 'updt')
comm : communicator
\end{verbatim}
\end{description}
\subsection{get\_count}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
INTEGER FUNCTION get_count(mat, nnz)
TYPE([z]mumps_mat) :: mat
INTEGER, INTENT(out), OPTIONAL :: nnz(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Returns the number of non-zeros and optionally an array of numbers
of non-zeros on each row
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
nnz : array containing numbers of non-zeros on each row.
\end{verbatim}
\end{description}
\subsection{factor}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE factor(mat, nlreord)
TYPE([z]mumps_mat) :: mat
LOGICAL, OPTIONAL, INTENT(in) :: nlreord
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Wrapper of to\_mat, reord\_mat and numfact
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
nlreord : call reord_mat if .TRUE. (default is .TRUE.)
\end{verbatim}
\end{description}
\subsection{bsolve}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE bsolve_mumps_mat1(mat, rhs, sol, nref)
TYPE([z]mumps_mat) :: mat
DOUBLE PRECISION|COMPLEX :: rhs(:)
DOUBLE PRECISION|COMPLEX, OPTIONAL :: sol(:)
INTEGER, OPTIONAL :: nref
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Wrapper of to\_mat, reord\_mat and numfact
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
rhs : input right-hand-side, overwriten by the solution if sol is not present
sol : contains solution
ref : maximum number of refinement steps. Default is 0 (no refinement).
debug : verbose output from MUMPS if .TRUE. Default is .FALSE.
\end{verbatim}
\end{description}
\subsection{to\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE to_mat(mat)
TYPE([z]mumps_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Convert linked list spmat to mumps matrix structure
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{reord\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE reord_mat(mat)
TYPE([z]mumps_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Reordering and symbolic factorization
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{numfact}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE numfact(mat)
TYPE([z]mumps_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Numerical factorization
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\begin{thebibliography}{99}
\bibitem{BSPLINES} {\tt BSPLINES} Reference Guide.
\bibitem{PARDISO} \url{http://www.pardiso-project.org/}
\bibitem{WSMP} \url{http://www-users.cs.umn.edu/~agupta/wsmp.html}
\bibitem{MUMPS} \url{http://graal.ens-lyon.fr/MUMPS/}
\bibitem{McMillan} B. F. McMillan, et. al. \emph{Rapid Fourier space
solution of linear partial integro-differential equations in
toroidal magnetic confinement geometries}, Computer Physics
Communications 181(4),
715-719 (2010)
\end{thebibliography}
\end{document}
diff --git a/docs/manual/using_bsplines.tex b/docs/manual/using_bsplines.tex
index fec665a..974735d 100644
--- a/docs/manual/using_bsplines.tex
+++ b/docs/manual/using_bsplines.tex
@@ -1,366 +1,366 @@
%
% @file using_bsplines.tex
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
\documentclass[a4paper]{article}
\usepackage{linuxdoc-sgml}
\usepackage{graphicx}
\usepackage{hyperref}
\usepackage{amsmath}
%\usepackage{verbatim}
%\usepackage[notref]{showkeys}
\title{\tt Using BSPLINES in Particle Codes}
\author{Trach-Minh Tran}
\date{v0.1, March 2012}
\abstract{These notes present some practical considerations on using
BSPLINES in particle codes, in particular for the charge or current
assignment as well as the field interpolation. Performance
measurements are done on an Intel Xeon X5570 and the more recent
Xeon E5-2680.}
\begin{document}
\maketitle
%\tableofcontents
\section{Introduction}
For simplicity, we assume in these notes that we are dealing with a
2D electrostatic particle code and the 2D Poisson equation is to be solved
using the Finite Element Method. Starting from the \emph{weak form}
and using the \emph{splines} for both \emph{basis} and \emph{test}
functions, the electrostatic field potential together with its
gradient and the right hand side can be computed from
\begin{equation}
\begin{split}
\phi(x,y) &=
\sum_{ij}\,c_{ij}\,\Lambda_i(x)\Lambda_j(y) \\
\frac{\partial\phi}{\partial x} &=
\sum_{ij}\,c_{ij}\,\Lambda'_i(x)\Lambda_j(y) \\
\frac{\partial\phi}{\partial y} &=
\sum_{ij}\,c_{ij}\,\Lambda_i(x)\Lambda'_j(y) \\
S_{ij} &= \sum_{\mu=1}^{N_p}\, q_\mu\Lambda_i(x_\mu)\Lambda_j(y_\mu),
\end{split}
\end{equation}
where $c_{ij}$ are the solutions of the discretized Poisson equation
and $\{x_\mu,y_\mu\}$ are the coordinates of the $N_p$ simulation
particles. At each time step, the calculation of both the field $\phi$
and its gradient (\emph{field interpolation}) for the particle
pusher and the construction of the RHS $S_{i}$ (\emph{charge
assignment}) involve thus the computation of a large number of
splines $\Lambda$ and its derivatives $\Lambda'$.
Notice that the construction of the solver matrix requires also
the calculations of the splines. This operation is however performed only
once at the initial timestep in the (most common) case where the
matrix is time independent and thus will not be considered in further
these notes.
\section{Computation of splines}
Let consider the grid defined by $x_i$, $i=1,\ldots,N+1$. Inside the
interval $[x_i, x_{i+1}]$, the $p+1$ non-zero splines of degree $p$
can be computed efficiently using its polynomial representation given
by
\begin{equation}
\begin{split}
\Lambda_{i+\alpha}(x) &= \sum_{k=0}^{p}\, V^{i}_{k\alpha}
(x-x_i)^k, \qquad \alpha=1,\ldots,p+1, \\
V^i_{k\alpha} &=
\left.\frac{1}{k!}\frac{d^k}{dx^k}\Lambda_{i+\alpha}(x)\right|_{x=x_i}.
\end{split}
\end{equation}
The $(p+1)^2N$ coefficients $V^i_{k\alpha}$ are precalculated and stored
during the spline initialization (in routine {\tt SET\_SPLINE}) by
using the \emph{recurrence relation} \cite{BSPLINES} to compute the spline and all its
$p$ derivatives. Note that for periodic splines on an equidistant
mesh, only $(p+1)^2$ coefficients $V_{k\alpha}$ are required since
the splines have \emph{translational invariance}.
For a polynomial $P(x)=a_0+a_1x+\ldots +a_px^p$, its value can be
calculated together with it first derivative, using Horner's rule as:
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
f = a(p)
fp = f
DO i=p-1,1,-1
f = a(i) + x*f
fp = f + x*fp
END DO
f = a(0) + x*f
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
showing that exactly $4p-2$ floating operations (flops) per point are
required. If only the value of the polynomial is needed, only $2p$ flops
per point are required.
\section{Field interpolation}
\subsection{1D case}
Let considered first the 1D case. The spline expansion of $\phi$ for
$x_i\le x < x_{i+1}$ are expressed as
\begin{equation}
\phi(x) = \sum_{\alpha=0}^pc_{i+\alpha}\Lambda_{i+\alpha}(x).
\end{equation}
To calculate the field using this spline expansion, $p+1$ splines
have to be first calculated followed by the sum above, which
yields a total cost of $2(p+1)^2\sim 2p^2$ flops per point. This cost
can be reduced by observing that
$\phi(x)$ is a \emph{piecewise polynomial} (PP) of degree $p$ in
each interval. Its PP coefficients can be obtained from
\begin{equation}
\begin{split}
\phi(x) &= \sum_{\alpha=0}^pc_{i+\alpha}\sum_{k=0}^{p}\, V^{i}_{k\alpha}
(x-x_i)^k \\
&= \sum_{k=0}^{p}\, \Pi^{i}_{k}(x-x_i)^k, \qquad
\Pi^{i}_{k}= \sum_{\alpha=0}^pc_{i+\alpha} V^{i}_{k\alpha}
\end{split}
\end{equation}
Once the $N(p+1)$ PP coefficients $\Pi^{i}_{k}$ have been calculated
from the spline expansion coefficients $c_{i+\alpha}$, only $2p$ flops per
point are required to obtain the field value, using the Horner's rule
described previously.
\subsection{2D case}
Extension for the spline expansion and the PP representation for
$\phi(x,y)$ is straightforwards and yields, for $x_i\le x < x_{i+1}$,
$y_j\le y < y_{j+1}$:
\begin{equation}
\begin{split}
\phi(x,y) &= \sum_{\alpha=0}^{p1}\sum_{\beta=0}^{p2}c_{i+\alpha,j+\beta}
\Lambda_{i+\alpha}(x)\Lambda_{j+\beta}(y) \\
\phi(x,y) &= \sum_{k=0}^{p1}\sum_{l=0}^{p2}\,\Pi^{ij}_{kl}(x-x_i)^k(y-y_j)^l, \qquad
\Pi^{ij}_{kl}= \sum_{\alpha=0}^{p1}\sum_{\beta=0}^{p2}c_{i+\alpha,j+\beta} V^{i}_{k\alpha}V^{j}_{l\beta},
\end{split}
\end{equation}
where $ V^{i}_{k\alpha}$ and $V^{j}_{l\beta}$ are the PP
coefficients of the splines $\Lambda_{i+\alpha}(x)$ and
$\Lambda_{j+\beta}(y)$ respectively. Assuming the same spline order
$p$ in
both $x$ and $y$, the flop counts per point for the 2 representations are
respectively $2(3p+2)(p+1)\sim 6p^2$ and $2p(p+2)\sim 2p^2$, while the
storages required for the spline coefficients $c$ and the PP
coefficients $\Pi$ are $(N+p)^2\sim N^2$ and $N^2(p+1)^2$ respectively.
\subsection{Implementation in BSPLINES}
The PP representation is selected by default in BSPLINES,
\emph{unless} the logical keyword {\tt NLPPFORM} is set to
{\tt .FALSE.} when calling the spline initialization routine {\tt
SET\_SPLINE}. The flop counts per point for both methods are
summarized in the table below
\begin{center}
\begin{tabular}{|l|c|c|}
\hline
& 1D & 2D \\\hline
Spline expansion & $2(p+1)^{2}$ & $2(3p+2)(p+1)$ \\
PP representation & $2p$ & $2p(p+2)$ \\\hline
\end{tabular}
\end{center}
The routine {\tt GRIDVAL} computes the value of the
field or one of its derivatives. The first call to this routine
computes the PP coefficients $\Pi$ if {\tt NLPPFORM=.TRUE.} is
selected or just store the spline coefficients $c$ in the spline
internal data otherwise. In the following calls to {\tt GRIDVAL}, $c$
should not be passed to the routines.
Notice that the PP representation requires to store the $N^2(p+1)^2$
PP coefficients in the 2D case, which is still acceptable. In the 3D
case, this storage requirement becomes $N^3(p+1)^3$ which can be
prohibitive! In this case the less efficient \emph{Spline expansion}
formulation should be selected.
In the \emph{particle loop}, the routine {\tt GETGRAD} which computes
the function and all its first partial derivatives at once should be
called instead of {\tt GRIDVAL}.
\section{Particle localization({\tt locintv})}
In both charge assignment and field interpolation, finding in which
interval of the spatial grid the particle is localized should be first
performed. This operation is trivial for the case of an equidistant
mesh. For non-equidistant mesh, an \emph{equidistant fine} mesh and its mapping to the
actual mesh are first constructed in the spline initialization routine
{\tt SET\_SPLINE} and used to localize the particles in the routine
{\tt LOCINTV}.
\section{Performances}
From the considerations above, using BSPLINES to perform the charge
assignment and field interpolation in 2D and 3D particle codes might
result in large overheads because of the large number of calls to the
routines {\tt BASFUN} to compute the splines or {\tt GETGRAD} to perform the field
interpolation at a \emph{single} particle position. In the following,
the performances the 2D linearized gyrokinetic code GYGLES which has
been adapted to use BSPLINES are analyzed. Vectorization by grouping
the particles for both charge assignment and field interpolation is then
proposed as a way to speed up these two operations when using
BSPLINES.
\subsection{Scalar performances}
Optimization of the scalar versions of {\tt BASFUN} and {\tt GETGRAD}
(when these routines are called with a \emph{single} particle) is
performed essentially by
\begin{itemize}
\item Minimizing the flop counts and reducing redundant operations.
\item Unrolling small loops, for example the loop over the $p+1$
splines that are non-zero at a given position, for small $p$.
\item Define all routines called by {\tt BASFUN} and {\tt GETGRAD} as
\emph{internal procedures}.
\item Rearranging the memory layout of the multi-dimension array
containing the PP coefficients of the spline.
\end{itemize}
The timings of the charge and current assignment (assign), the particle
pusher (push) and the main time loop for a 5 time step run of GYGLES,
on an Intel Xeon X5570 (hpcff.fz-juelich.de), using 4 MPI
processes and Intel Fortran 12.1.2 are summarized in the following
table
\begin{center}
\begin{tabular}{lrrrrr}
\hline
& $T_0$(s) & $T_1$(s) & $T_2$(s) & $T_1/T_0$ & $T_2/T_1$ \\
\hline
assign & 1.454E+01 & 2.126E+01 & 2.259E+01 & 1.46 & 1.06 \\
push & 2.536E+01 & 3.080E+01 & 3.144E+01 & 1.21 & 1.02 \\
mainloop & 4.197E+01 & 5.955E+01 & 6.149E+01 & 1.42 & 1.03 \\
\hline
\end{tabular}
\end{center}
where $T_0$ is the time in seconds obtained with the original code
while $T_1$ and $T_2$ are the times obtained with BSPLINES,
respectively using an \emph{equidistant} and \emph{non-equidistant}
radial mesh. In all the 3 runs, a quadratic splines were used.
The small difference between \emph{equidistant} and
\emph{non-equidistant} mesh comes mainly from the particle localization.
The same run on an Intel Xeon E5-2680 (helios.iferc-csc.org), using
the same Intel compiler (with AVX instructions) yields
\begin{center}
\begin{tabular}{lrrrrr}
\hline
& $T_0$(s) & $T_1$(s) & $T_2$(s) & $T_1/T_0$ & $T_2/T_1$ \\
\hline
assign & 1.093E+01 & 1.987E+01 & 2.086E+01 & 1.82 & 1.05 \\
push & 2.385E+01 & 2.868E+01 & 2.994E+01 & 1.20 & 1.04 \\
mainloop & 3.656E+01 & 5.411E+01 & 5.598E+01 & 1.48 & 1.03 \\
\hline
\end{tabular}
\end{center}
\subsection{Speed up by vectorization}
As found in the last section, using external routines from BSPLINES
instead of \emph{hard coding} the spline computations
results in a slowing down of 40--50\% for the main time
loop. As will shown later, this problem could be solved by \emph{grouping} the
particles and using the vectorized {\tt BASFUN} and {\tt GETGRAD}
routines. Such particle grouping can be done for example, by replacing the usual
particle loop by the following Fortran code fragment
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
nset = npart/ngroup
IF(MODULO(npt, ngroup).NE.0) nset = nset+1
i2 = 0
DO is=1,nset
i1 = i2+1
i2 = MIN(i2+ngroup,npart)
CALL basfun(x(i1:i2), ...)
END DO
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
where {\tt npart} particles are partitioned into {\tt nset} groups,
each containing at most {\tt ngroup} particles. Vectorization of the
routines {\tt BASFUN} and {\tt GETGRAD} is achieved by moving whenever
is possible the loop over the {\tt ngroup} particles into the
innermost loop.
The vectorization performances shown in Fig~.\ref{fig:basfun_hpcff} and
Fig~.\ref{fig:getgrad_hpcff}, respectively for {\tt BASFUN} and {\tt
GETGRAD} are obtained using version $12.1.2$ of Intel compiler on
an Intel Xeon X5570 (hpcff.fz-juelich.de). With a speedup of at least
2 for quadratic splines, the slowing down found previously in the
scalar version could be likely compensated. The new AVX instructions
present in the recent Intel Xeon E5-2680 (helios.iferc-csc.org) seems
to improve somewhat the vectorization performance as shown in
Fig~.\ref{fig:basfun_helios} and Fig~.\ref{fig:getgrad_helios}.
\begin{figure}
\centering
\includegraphics[angle=0,width=\hsize]{basfun_perf_hpcff}
\caption{In this test, $10^5$ particles are distributed randomly on
an equidistant mesh of 64 intervals. On each point, all the $p+1$
splines are computed. The particle localization routine {\tt
locintv} is included in the timing. In order to have a good
statistics in the measurements, $1'000$ iterations of the particle loop are considered.}
\label{fig:basfun_hpcff}
\end{figure}
\begin{figure}
\centering
\includegraphics[angle=0,width=\hsize]{getgrad_perf_hpcff}
\caption{In this test, $10^5$ particles are distributed randomly on
an equidistant 2D $(x,y)$ mesh of $64\times 64$ intervals, where
the coordinate $y$ is periodic. On each point, the function
together with its gradient are computed, using the PP
representation. The particle localization routine {\tt
locintv} is included in the timing. In order to have a good
statistics in the measurements, $100$ iterations of the particle loop are considered.}
\label{fig:getgrad_hpcff}
\end{figure}
\begin{figure}
\centering
\includegraphics[angle=0,width=\hsize]{basfun_perf_helios}
\caption{In this test, $10^5$ particles are distributed randomly on
an equidistant mesh of 64 intervals. On each point, all the $p+1$
splines are computed. The particle localization routine {\tt
locintv} is included in the timing. In order to have a good
statistics in the measurements, $1'000$ iterations of the particle loop are considered.}
\label{fig:basfun_helios}
\end{figure}
\begin{figure}
\centering
\includegraphics[angle=0,width=\hsize]{getgrad_perf_helios}
\caption{In this test, $10^5$ particles are distributed randomly on
an equidistant 2D $(x,y)$ mesh of $64\times 64$ intervals, where
the coordinate $y$ is periodic. On each point, the function
together with its gradient are computed, using the PP
representation. The particle localization routine {\tt
locintv} is included in the timing. In order to have a good
statistics in the measurements, $100$ iterations of the particle loop are considered.}
\label{fig:getgrad_helios}
\end{figure}
\begin{thebibliography}{99}
\bibitem{BSPLINES} {\tt BSPLINES} Reference Guide.
\end{thebibliography}
\end{document}
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 4169dde..4b28ada 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -1,77 +1,77 @@
#
# @file CMakeLists.txt
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Nicolas Richart
# @author Trach-Minh Tran
#
project(bsplines_tests)
add_library(local_util STATIC
pde1dp_mod.f90
pde3d_mod.f90
ppde3d_mod.f90
ppde3d_pb_mod.f90
tcdsmat_mod.f90
meshdist.f90
dismat.f90
ibcmat.f90
disrhs.f90
)
target_link_libraries(local_util PUBLIC bsplines)
set(BS_TESTS
driv1 driv2 driv3 driv4
pde1d pde1dp pde1dp_cmpl
pde2d pde2d_pb
pde1dp_cmpl_dft
pde3d ppde3d ppde3d_pb
fit1d fit1dbc fit1dp
fit2d fit2d1d fit2d_cmpl fit2dbc fit2dbc_x fit2dbc_y
moments optim1 optim2 optim3
tcdsmat tmassmat tbasfun tsparse1
basfun_perf getgrad_perf gridval_perf
test_kron
)
if(HAS_PARDISO)
set(BS_TESTS ${BS_TESTS}
pde1dp_cmpl_pardiso
pde2d_pardiso
pde2d_sym_pardiso
pde2d_sym_pardiso_dft
tsparse2
)
endif()
if(HAS_MUMPS)
set(BS_TESTS ${BS_TESTS}
pde2d_mumps
pde1dp_cmpl_mumps
)
endif()
foreach(test ${BS_TESTS})
add_executable(${test} ${test}.f90)
target_link_libraries(${test} local_util ${LIBS} ${EXTRA_LIBS})
endforeach()
diff --git a/examples/Makefile b/examples/Makefile
index 552d03f..0e45321 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -1,506 +1,506 @@
#
# @file Makefile
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Emmanuel Lanti
# @author Trach-Minh Tran
#
PREFIX=$(HOME)
XGRAFIX=/usr/local/xgrafix_1.2/src-double
# FUTILS=/usr/local/crpp/futils
# BSPLINES=/usr/local/crpp/bsplines
# PPUTILS2=../pputils2
# PPPACK=../pppack
# SLATEC=/usr/local/slatec
# FFTW=/usr/local/fftw-2.1.5-opt
ARPACK=/usr/local/ARPACK
LAPACK95=$(MKL)/../../../mkl/include/intel64/lp64
MPIF90 = mpif90
F90 = ifort
LD = $(MPIF90)
debug = -g -traceback -check bounds -warn alignments -warn unused
optim = -O3 -xHOST
OPT=$(debug)
#OPT=$(optim)
F90FLAGS = $(OPT) -fPIC -I../fft -I$(BSPLINES)/include -I$(FUTILS)/include \
-I$(FFTW)/include -I$(MKL)/../../include/intel64/lp64
LDFLAGS = $(OPT) -fPIC -L$(FUTILS)/lib -L$(BSPLINES)/lib -L${HDF5}/lib -L$(FFTW)/lib64 \
-L$(SLATEC)/lib -L$(ARPACK)
CC = cc
CFLAGS = -O2
LIBS = -mkl=cluster -lbsplines -lpppack -lpppack -lpputils2 -lfutils -lfft -larpack \
-lfftw -lhdf5_fortran -lhdf5 -lz -lpthread
LIBS1 = -mkl=cluster -lbsplines1 -lpppack -lfutils \
-lhdf5_fortran -lhdf5 -lz -lsz -lpthread
ifdef WSMP
LDFLAGS += -L$(WSMP)
LIBS += -lwsmp64
LIBS1 += -lpwsmp64
endif
ifdef MUMPS
F90FLAGS += -I$(MUMPS)/include -I$(LAPACK95)
LDFLAGS += -L$(MUMPS)/lib -L$(PARMETIS)/lib
LIBS += $(MUMPSLIBS)
endif
ifdef PETSC_DIR
include ${PETSC_DIR}/conf/variables
F90FLAGS += -I$(PETSC_DIR)/include -I$(PETSC_DIR)/$(PETSC_ARCH)/include \
-I$(MKL)/../../include
LIBS += ${PETSC_FORTRAN_LIB} ${PETSC_KSP_LIB}
endif
PDE1DOBJS = pde1d.o
PDE2DOBJS = pde2d.o dismat.o disrhs.o ibcmat.o
FIT1DOBJJS = fit1d.o
.SUFFIXES:
.SUFFIXES: .o .c .f90 .f
.f90.o:
$(MPIF90) $(F90FLAGS) -c $<
.f.o:
$(F90) $(F90FLAGS) -c $<
all: examples tmat
EX_FILES = driv1 driv2 driv3 driv4 pde1d pde1dp pde1dp_cmpl pde3d ppde3d ppde3d_pb \
pde2d pde2d_pb fit1d fit1dbc \
fit1dp fit2d fit2d1d fit2d_cmpl fit2dbc fit2dbc_x fit2dbc_y \
moments optim1 optim2 optim3 tmassmat tbasfun tcdsmat tsparse1 tsparse2 \
pde2d_pardiso pde2d_sym_pardiso pde1dp_cmpl_pardiso pde1dp_cmpl_dft \
pde2d_sym_pardiso_dft \
pde1d_eig_csr pde1d_eig_gb pde1d_eig_ge
ifdef WSMP
EX_FILES += pde2d_wsmp pde2d_pwsmp pde2d_sym_wsmp pde1dp_cmpl_wsmp
endif
ifdef MUMPS
EX_FILES += pde2d_mumps pde1dp_cmpl_mumps pde1d_eig_zmumps
endif
ifdef PETSC_DIR
EX_FILES += pde2d_petsc
endif
examples: $(EX_FILES)
tmat: tmatrix_gb tmatrix_pb tmatrix_zpb
adv: adv.o extra.o
$(LD) $(LDFLAGS) -L$(XGRAFIX) -o $@ $< extra.o $(LIBS) \
-lfftw -lXGF -lXGC -lX11
cp -p $@ ../bin/
driv1: driv1.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
driv2: driv2.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
driv3: driv3.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
driv4: driv4.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
pde1d: $(PDE1DOBJS)
$(LD) $(LDFLAGS) -o $@ $(PDE1DOBJS) $(LIBS)
cp -p $@ ../bin/
pde1d_eig_csr: pde1d_eig_csr.o
$(LD) $(LDFLAGS) -o $@ pde1d_eig_csr.o $(LIBS) -lpputils2
cp -p $@ ../bin/
pde1d_eig_zcsr: pde1d_eig_zcsr.o
$(LD) $(LDFLAGS) -o $@ pde1d_eig_zcsr.o $(LIBS)
cp -p $@ ../bin/
pde1d_eig_zmumps: pde1d_eig_zmumps.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS) -lpputils2
cp -p $@ ../bin/
pde1d_eig_zcsr: pde1d_eig_zcsr.o
$(LD) $(LDFLAGS) -o $@ pde1d_eig_zcsr.o $(LIBS)
cp -p $@ ../bin/
pde1d_eig_gb: pde1d_eig_gb.o
$(LD) $(LDFLAGS) -o $@ pde1d_eig_gb.o $(LIBS)
cp -p $@ ../bin/
pde1d_eig_ge: pde1d_eig_ge.o
$(LD) $(LDFLAGS) -o $@ pde1d_eig_ge.o $(LIBS)
cp -p $@ ../bin/
pde1dp: pde1dp.o pde1dp_mod.o
$(LD) $(LDFLAGS) -o $@ pde1dp.o pde1dp_mod.o $(LIBS)
cp -p $@ ../bin/
pde1dp_cmpl: pde1dp_cmpl.o
$(LD) $(LDFLAGS) -o $@ pde1dp_cmpl.o $(LIBS)
cp -p $@ ../bin/
pde1dp_KA: pde1dp_KA.o pde1dp_mod_KA.o
$(LD) $(LDFLAGS) -o $@ pde1dp_KA.o pde1dp_mod_KA.o $(LIBS)
cp -p $@ ../bin/
pde2d: $(PDE2DOBJS)
$(LD) $(LDFLAGS) -o $@ $(PDE2DOBJS) $(LIBS)
cp -p $@ ../bin/
pde3d: pde3d.o pde3d_mod.o
$(LD) $(LDFLAGS) -o $@ pde3d.o pde3d_mod.o $(LIBS)
cp -p $@ ../bin/
ppde3d: ppde3d.o ppde3d_mod.o
$(LD) $(LDFLAGS) -o $@ ppde3d.o ppde3d_mod.o $(LIBS)
cp -p $@ ../bin/
ppde3d_pb: ppde3d_pb.o ppde3d_pb_mod.o
$(LD) $(LDFLAGS) -o $@ ppde3d_pb.o ppde3d_pb_mod.o $(LIBS)
cp -p $@ ../bin/
pde2d_pb: pde2d_pb.o
$(LD) $(LDFLAGS) -o $@ pde2d_pb.o $(LIBS)
cp -p $@ ../bin/
pde2d_nh: pde2d_nh.o
$(LD) $(LDFLAGS) -o $@ pde2d_nh.o $(LIBS)
cp -p $@ ../bin/
tcdsmat: tcdsmat.o tcdsmat_mod.o meshdist.o
$(LD) $(LDFLAGS) -o $@ tcdsmat.o tcdsmat_mod.o meshdist.o $(LIBS)
cp -p $@ ../bin/
tmatrix_pb: tmatrix_pb.o
$(LD) $(LDFLAGS) -o $@ tmatrix_pb.o $(LIBS)
cp -p $@ ../bin/
tmatrix_zpb: tmatrix_zpb.o
$(LD) $(LDFLAGS) -o $@ tmatrix_zpb.o $(LIBS)
cp -p $@ ../bin/
tmatrix_gb: tmatrix_gb.o
$(LD) $(LDFLAGS) -o $@ tmatrix_gb.o $(LIBS)
cp -p $@ ../bin/
fit1d: $(FIT1DOBJJS)
$(LD) $(LDFLAGS) -o $@ $(FIT1DOBJJS) $(LIBS)
cp -p $@ ../bin/
fit1d_cmpl: fit1d_cmpl.o
$(LD) $(LDFLAGS) -o $@ fit1d_cmpl.o $(LIBS)
cp -p $@ ../bin/
gyro: gyro.o
$(LD) $(LDFLAGS) -o $@ gyro.o -lslatec $(LIBS)
cp -p $@ ../bin/
fit1dbc: fit1dbc.o
$(LD) $(LDFLAGS) -o $@ fit1dbc.o $(LIBS)
cp -p $@ ../bin/
fit1dp: fit1dp.o
$(LD) $(LDFLAGS) -o $@ fit1dp.o $(LIBS)
cp -p $@ ../bin/
fit2d: fit2d.o meshdist.o
$(LD) $(LDFLAGS) -o $@ fit2d.o meshdist.o $(LIBS)
cp -p $@ ../bin/
fit2d1d: fit2d1d.o meshdist.o
$(LD) $(LDFLAGS) -o $@ fit2d1d.o meshdist.o $(LIBS)
cp -p $@ ../bin/
fit2d_cmpl: fit2d_cmpl.o meshdist.o
$(LD) $(LDFLAGS) -o $@ fit2d_cmpl.o meshdist.o $(LIBS)
cp -p $@ ../bin/
fit2dbc: fit2dbc.o meshdist.o
$(LD) $(LDFLAGS) -o $@ fit2dbc.o meshdist.o $(LIBS)
cp -p $@ ../bin/
fit2dbc_x: fit2dbc_x.o meshdist.o
$(LD) $(LDFLAGS) -o $@ fit2dbc_x.o meshdist.o $(LIBS)
cp -p $@ ../bin/
fit2dbc_y: fit2dbc_y.o meshdist.o
$(LD) $(LDFLAGS) -o $@ fit2dbc_y.o meshdist.o $(LIBS)
cp -p $@ ../bin/
moments: moments.o
$(LD) $(LDFLAGS) -o $@ moments.o $(LIBS)
cp -p $@ ../bin/
mesh: mesh.o meshdist.o
$(LD) $(LDFLAGS) -o $@ mesh.o meshdist.o $(LIBS)
cp -p $@ ../bin/
optim1: optim1.o
$(LD) $(LDFLAGS) -o $@ optim1.o $(LIBS)
cp -p $@ ../bin/
optim2: optim2.o
$(LD) $(LDFLAGS) -o $@ optim2.o $(LIBS)
cp -p $@ ../bin/
optim3: optim3.o
$(LD) $(LDFLAGS) -o $@ optim3.o $(LIBS)
cp -p $@ ../bin/
tmassmat: tmassmat.o
$(LD) $(LDFLAGS) -o $@ tmassmat.o $(LIBS)
cp -p $@ ../bin/
tbasfun: tbasfun.o
$(LD) $(LDFLAGS) -o $@ tbasfun.o $(LIBS)
cp -p $@ ../bin/
basfun_perf: basfun_perf.o
$(LD) $(LDFLAGS) -o $@ basfun_perf.o $(LIBS)
cp -p $@ ../bin/
gridval_perf: gridval_perf.o
$(LD) $(LDFLAGS) -o $@ gridval_perf.o $(LIBS)
cp -p $@ ../bin/
getgrad_perf: getgrad_perf.o
$(LD) $(LDFLAGS) -o $@ getgrad_perf.o $(LIBS)
cp -p $@ ../bin/
basfun_perf1: basfun_perf1.o
$(LD) $(LDFLAGS) -o $@ basfun_perf1.o $(LIBS)
cp -p $@ ../bin/
tlocintv: tlocintv.o meshdist.o
$(LD) $(LDFLAGS) -o $@ tlocintv.o meshdist.o $(LIBS)
cp -p $@ ../bin/
tgausleg: tgausleg.o meshdist.o
$(LD) $(LDFLAGS) -o $@ tgausleg.o meshdist.o $(LIBS)
cp -p $@ ../bin/
poisson: poisson.o
$(LD) $(LDFLAGS) -o $@ poisson.o $(LIBS)
cp -p $@ ../bin/
poisson_mumps: poisson_mumps.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
poisson_petsc: poisson_petsc.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
tsparse1: tsparse1.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
tsparse2: tsparse2.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
pde2d_pardiso: pde2d_pardiso.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
pde2d_mumps: pde2d_mumps.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
pde2d_petsc: pde2d_petsc.o $(PPUTILS2)/pputils2.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
pde2d_sym_pardiso: pde2d_sym_pardiso.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
pde2d_sym_pardiso_dft: pde2d_sym_pardiso_dft.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
pde2d_sym_wsmp_dft: pde2d_sym_wsmp_dft.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
zssmp_ex1: zssmp_ex1.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
zpardiso_ex1: zpardiso_ex1.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
pde2d_sym_wsmp: pde2d_sym_wsmp.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
pde2d_wsmp: pde2d_wsmp.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
pde2d_pwsmp: pde2d_pwsmp.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS1)
cp -p $@ ../bin/
pde1dp_cmpl_pardiso: pde1dp_cmpl_pardiso.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
pde1dp_cmpl_mumps: pde1dp_cmpl_mumps.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
pde1dp_cmpl_dft: pde1dp_cmpl_dft.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
pde1dp_cmpl_wsmp: pde1dp_cmpl_wsmp.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
tspline: tspline.o
$(LD) $(LDFLAGS) -o $@ tspline.o $(LIBS)
cp -p $@ ../bin/
tpsum_mat: tpsum_mat.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
tp2p_mat: tp2p_mat.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
cp -p $@ ../bin/
test_pwsmp: test_pwsmp.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS1)
cp -p $@ ../bin/
driv1.o:
driv2.o:
driv3.o:
driv4.o:
pde1d.o:
pde1dp.o: pde1dp_mod.o
pde1dp_cmpl.o:
pde1dp_mod.o:
pde1dp_KA.o: pde1dp_mod_KA.o
pde1dp_mod_KA.o:
pde2d.o:
pde3d.o: pde3d_mod.o
pde3d_mod.o:
ppde3d.o: ppde3d_mod.o
ppde3d_mod.o:
ppde3d_pb.o: ppde3d_pb_mod.o
ppde3d_pb_mod.o:
pde2d_pb.o:
pde2d_nh.o:
tcdsmat.o: tcdsmat_mod.o
tcdsmat_mod.o:
fit1d.o:
fit1d_cmpl.o:
gyro.o:
fit1dbc.o:
fit1dp.o:
fit2d.o:
fit2d_cmpl.o:
fit2dbc.o:
dismat.o:
disrhs.o:
ibcmat.o:
adv.o:
tmatrix_pb.o:
tmatrix_zpb.o:
tmatrix_gb.o:
moments.o:
mesh.o:
optim1.o:
optim2.o:
optim3.o:
tmassmat.o:
tbasfun.o:
basfun_perf.o:
basfun_perf1.o:
tlocintv.o:
tgausleg.o:
poisson.o:
poisson_mumps.o:
tsparse1.o:
tsparse2.o:
pde2d_pardiso.o:
pde2d_mumps.o:
pde2d_petsc.o:
pde2d_sym_pardiso.o:
pde2d_sym_pardiso_dft.o:
pde2d_sym_wsmp_dft.o:
pde2d_wsmp.o:
pde2d_pwsmp.o:
pde2d_sym_wsmp.o:
pde1dp_cmpl_pardiso.o:
pde1dp_cmpl_mumps.o:
pde1dp_cmpl_dft.o:
pde1dp_cmpl_wsmp.o:
tpsum_mat.o:
tp2p_mat.o:
poisson_petsc.o:
tags:
etags *.f *.f90 ../src/*.f90 $(PPPACK)/*.f90
clean:
rm -f *.o *.mod *~ a.out
distclean: clean
# $(MAKE) -C ../src distclean
# $(MAKE) -C ../fft distclean
# $(MAKE) -C $(PPUTILS2) distclean
rm -f *.a *.mod pde1d pde1dp pde1dp_cmpl pde1dp_KA driv1 driv2 driv3 driv4 \
tmatrix_pb tmatrix_gb tmatrix_zpb \
pde2d pde2d_pb pde2d_nh pde3d ppde3d ppde3d_pb\
fit1d fit1d_cmpl gyro fit1dbc fit1dp \
fit2d fit2d1d fit2d_cmpl fit2dbc \
fit2dbc_x fit2dbc_y adv moments tcdsmat poisson poisson_mumps\
mesh optim1 optim2 optim3 tmassmat tbasfun \
basfun_perf gridval_perf getgrad_perf tlocintv \
tsparse1 tsparse2 \
pde2d_pardiso pde2d_sym_pardiso pde2d_wsmp pde2d_sym_wsmp \
pde1dp_cmpl_dft pde1dp_cmpl_wsmp pde1d_eig_csr pde1d_eig_pb pde1d_eig_ge \
pde2d_sym_pardiso_dft pde1dp_cmpl_pardiso \
pde2d_mumps pde1dp_cmpl_mumps tpsum_mat tp2p_mat pde2d_sym_wsmp_dft \
poisson_petsc pde2d_petsc \
pde1d_eig_csr pde1d_eig_zcsr pde1d_eig_zmumps pde1d_eig_gb pde1d_eig.ge \
../bin/*
diff --git a/examples/adv.f90 b/examples/adv.f90
index 8c40a29..c3fc591 100644
--- a/examples/adv.f90
+++ b/examples/adv.f90
@@ -1,320 +1,320 @@
!>
!> @file adv.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! 1d Periodic Advection: F(x,t) = F(x-u*dt,t-dt)
! using module bsplines
!
USE bsplines
!
IMPLICIT NONE
INCLUDE 'fftw_f77.h'
TYPE(spline1d) :: spl
INTEGER, PARAMETER :: nhistmx=1000, ncomb=4
INTEGER :: nx, nidbas, dim
INTEGER :: nstep, nskipt, nhist, mhist
DOUBLE PRECISION :: a, b, dt, u, w, coefx(5)
DOUBLE PRECISION, DIMENSION(0:nhistmx) :: thist, tmass, tfmin, tfmax, ermass
DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:), xshft(:)
DOUBLE PRECISION, ALLOCATABLE :: coefs(:), ferr(:), kx(:), ampl(:)
DOUBLE COMPLEX, ALLOCATABLE :: cfgrid(:), ffft(:)
INTEGER(8) :: forw
DOUBLE PRECISION :: time
INTEGER :: i
NAMELIST /newrun/ nx, nidbas, a, b, dt, u, w, coefx
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 100 ! Number of intevals in x
a = 0.0 ! Left boundary of interval
b = 100.0 ! Right boundary of interval
dt = 0.1 ! Time step
u = 1.0 ! Velocity
w = 2.0 ! Shape of initial function
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
WRITE(*,newrun)
!===========================================================================
! 2.0 Define initial conditions
!
! Set up mesh
!
ALLOCATE(xgrid(0:nx), xshft(0:nx), fgrid(0:nx), ferr(0:nx))
xgrid(0) = a
xgrid(nx) = b
CALL meshdist(coefx, xgrid, nx)
!
! Set up the spline interpolation
!
CALL set_splcoef(nidbas, xgrid, spl, period=.TRUE.)
CALL get_dim(spl, dim)
WRITE(*,'(a,i6)') 'dimension of splines', dim
ALLOCATE(coefs(dim))
!
! Initial conditions
!
time = 0.0d0
nstep = 0
nskipt = 1
DO i=0,nx-1
fgrid(i) = finit(xgrid(i))
END DO
fgrid(nx) = fgrid(0)
ferr = 0.0
CALL get_splcoef(spl, fgrid, coefs)
WRITE(*,'(a/(10f8.3))') 'knots', spl%knots
WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid
WRITE(*,'(a/(10f8.3))') 'coefs', coefs
!
! Set up FFT
!
ALLOCATE(kx(-nx/2+1:nx/2), ampl(-nx/2+1:nx/2))
ALLOCATE(cfgrid(0:nx-1), ffft(0:nx-1))
DO i=-nx/2+1,nx/2
kx(i) = i
END DO
cfgrid(0:nx-1) = fgrid(0:nx-1)
CALL fftw_f77_create_plan(forw, nx, FFTW_FORWARD, FFTW_ESTIMATE)
CALL fftw_f77_one(forw, cfgrid, ffft)
ampl(0:nx/2) = ABS(ffft(0:nx/2))
ampl(-nx/2+1:-1) = ABS(ffft(nx/2+1:nx-1))
!
! Set up history arrays
!
nhist = 0
thist(nhist) = 0.0
!!$ tmass(nhist) = SUM(coefs(1:nx))
tmass(nhist) = SUM(fgrid(1:nx))
tfmin(nhist) = MINVAL(fgrid)
tfmax(nhist) = MAXVAL(fgrid)
ermass(nhist) = tmass(nhist)-tmass(0)
WRITE(*,'(a,(10f8.3))') 'Initial mass', tmass(nhist)
!
! Initialize Xgrafix
!
CALL xginit(3,'ADV','adv',' ',' ',' ',' ',time)
!
CALL xgset2d('linlin', 'X', 'F', 'open', 220, 60, 1.d0, 1.d0, &
& .FALSE., .FALSE., xgrid(0), xgrid(nx), -0.2d0, 1.2d0)
CALL xgcurve(xgrid, fgrid, nx+1, 1)
!
CALL xgset2d('linlin', 'X', 'FERR', 'open', 620, 60, 1.d0, 1.d0, &
& .FALSE., .FALSE., xgrid(0), xgrid(nx), -1.d0, 1.d0)
CALL xgcurve(xgrid, ferr, nx+1, 1)
!
CALL xgset2d('linlin', 'Time', 'Error Mass', 'open', 820, 400, 1.d0, 1.d0, &
& .TRUE., .TRUE., 0.d0, 1.d0, 0.d0, 1.d0)
CALL xgcurve(thist, ermass, nhist, 1)
!
CALL xgset2d('linlin', 'Time', 'Min/Max', 'open', 420, 400, 1.d0, 1.d0, &
& .TRUE., .TRUE., 0.d0, 1.d0, 0.d0, 1.d0)
CALL xgcurve(thist, tfmin, nhist, 1)
CALL xgcurve(thist, tfmax, nhist, 2)
!
CALL xgset2d('linlin', 'kx', 'Amplitude of F', 'open', 20, 400, 1.d0, 1.d0, &
& .FALSE., .FALSE., kx(-nx/2+1), kx(nx/2), &
& 0.0d0, MAXVAL(ampl))
CALL xgcurve(kx, ampl, nx, 1)
!
CALL xgupdate
!===========================================================================
! 3.0 Time loop
!
nskipt = 1
DO
nstep = nstep+1
time = time+dt
CALL xgevent
!
! Shift x
!
CALL get_splcoef(spl, fgrid, coefs)
xshft(0:nx) = xgrid(0:nx) - u*dt
CALL gridval(spl, xshft, fgrid, 0, coefs)
!
xshft(0:nx) = xgrid(0:nx) - u*time
DO i =0,nx
ferr(i) = fgrid(i) - finit(xshft(i))
END DO
!
cfgrid(0:nx-1) = fgrid(0:nx-1)
CALL fftw_f77_one(forw, cfgrid, ffft)
ampl(0:nx/2) = ABS(ffft(0:nx/2))
ampl(-nx/2+1:-1) = ABS(ffft(nx/2+1:nx-1))
!
! Diagnostics
!
IF( MOD(nstep,nskipt) .EQ. 0 ) THEN
nhist = nhist+1
IF( nhist .GT. nhistmx ) THEN
nskipt = ncomb*nskipt
mhist = nhist-1
CALL packarr(mhist, thist, ncomb, nhist)
CALL packarr(mhist, tmass, ncomb, nhist)
CALL packarr(mhist, tfmin, ncomb, nhist)
CALL packarr(mhist, tfmax, ncomb, nhist)
CALL packarr(mhist, ermass, ncomb, nhist)
END IF
thist(nhist) = time
tmass(nhist) = SUM(fgrid(1:nx))
!!$ tmass(nhist) = SUM(coefs(1:nx))
tfmin(nhist) = MINVAL(fgrid)
tfmax(nhist) = MAXVAL(fgrid)
ermass(nhist) = (tmass(nhist)-tmass(0))/tmass(0)
END IF
!
CALL xgupdate
END DO
!===========================================================================
! 9.0 Prologue
!
CALL fftw_f77_destroy_plan(forw)
DEALLOCATE(xgrid, fgrid, xshft, coefs)
CONTAINS
DOUBLE PRECISION FUNCTION finit(xx)
!
! A "box" function
!
DOUBLE PRECISION, INTENT(in) :: xx
DOUBLE PRECISION :: xl, xr, xl0, xr0, h, x, xlen
INTEGER :: kl, kr, klflag, krflag
!
xlen = b-a
x = a + MODULO(xx-a+xlen, xlen)
!
xl = 0.375*(b-a)
xr = 0.624*(b-a)
CALL interv(xgrid, nx+1, xl, kl, klflag)
CALL interv(xgrid, nx+1, xr, kr, krflag)
xl0 = xl + w*(xgrid(kl)-xgrid(kl-1))
xr0 = xr - w*(xgrid(kr)-xgrid(kr-1))
CALL interv(xgrid, nx+1, xl0, kl, klflag)
CALL interv(xgrid, nx+1, xr0, kr, krflag)
IF( x .LT. xl0 ) THEN
h = xgrid(kl)-xgrid(kl-1)
finit = EXP(-((x-xl0)/(w*h))**2)
ELSE IF( x .GT. xr0) THEN
h = xgrid(kr)-xgrid(kr-1)
finit = EXP(-((x-xr0)/(w*h))**2)
ELSE
finit = 1.0d0
END IF
END FUNCTION finit
END PROGRAM main
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
SUBROUTINE packarr(n, arr, skip, nhist)
IMPLICIT NONE
INTEGER :: n, skip, i, ii, nhist
DOUBLE PRECISION :: arr(0:n)
ii = 0
DO i=0,n,skip
arr(ii) = arr(i)
ii=ii+1
END DO
nhist = ii
END SUBROUTINE packarr
!+++
SUBROUTINE dump(filename, l)
!
! Is invoked when button "Dump" is pressed.
!
IMPLICIT NONE
CHARACTER(len=*) :: filename
INTEGER :: l
WRITE(*,'(a,a,a1)') 'Dumpfile = "', filename(1:l),'"'
END SUBROUTINE dump
SUBROUTINE quit()
!
! Is invoked when button "Quit" is pressed
!
IMPLICIT NONE
PRINT*, 'Program terminated ...'
END SUBROUTINE quit
diff --git a/examples/basfun_perf.f90 b/examples/basfun_perf.f90
index 7cab71f..69c54d3 100644
--- a/examples/basfun_perf.f90
+++ b/examples/basfun_perf.f90
@@ -1,170 +1,170 @@
!>
!> @file basfun_perf.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Performance of scalar and vector versions of def_basfun
!
USE bsplines
IMPLICIT NONE
INTEGER :: nx, nidbas, nrank, npt=10, jdermx
DOUBLE PRECISION :: dx
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid
DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fun(:, :)
INTEGER :: left, i, i1, i2
INTEGER :: ngroup, nset, nremain
TYPE(spline1d) :: splx
DOUBLE PRECISION :: t0, t1, seconds
DOUBLE PRECISION :: t_loop, t_locintv1, t_basfun1, t_locintv, t_basfun
INTEGER :: its, nits
INTEGER, ALLOCATABLE :: vleft(:)
DOUBLE PRECISION, ALLOCATABLE :: vfun(:,:,:)
LOGICAL :: nlperiod
!
NAMELIST /newrun/ nx, nidbas, npt, nits, ngroup, jdermx, nlperiod
!
!===============================================================================
!
! 1D grid
!
nx = 10
nidbas = 3
npt = 1000000
nits = 100
ngroup = 10
jdermx = 0
nlperiod = .FALSE.
READ(*,newrun)
WRITE(*,newrun)
ALLOCATE(xgrid(0:nx))
dx = 1.0d0/REAL(nx)
xgrid = (/ (i*dx,i=0,nx) /)
WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
!
! Set up spline
!
CALL set_spline(nidbas, 4, xgrid, splx, period=nlperiod)
nrank = splx%dim
WRITE(*,'(a, i5)') 'nrank =', nrank
WRITE(*,'(a/(10f8.3))') 'knots', splx%knots
!
ALLOCATE(xpt(npt))
ALLOCATE(fun(0:nidbas,0:jdermx)) ! Values and first derivatives of all Splines
CALL RANDOM_NUMBER(xpt)
!===============================================================================
! 1.0 Scalar version
!
! loop
t0 = seconds()
DO its=1,nits
DO i=1,npt
END DO
END DO
t_loop = (seconds()-t0)/REAL(nits*npt,8)
!
! locintv
t0 = seconds()
DO its=1,nits
DO i=1,npt
CALL locintv(splx, xpt(i), left)
END DO
END DO
t_locintv1 = (seconds()-t0)/REAL(nits*npt,8)
!
! def_basfun
t0 = seconds()
DO its=1,nits
DO i=1,npt
CALL locintv(splx, xpt(i), left)
CALL basfun(xpt(i), splx, fun, left+1)
END DO
END DO
t_basfun1 = (seconds()-t0)/REAL(nits*npt,8)
!
WRITE(*,'(6x,3a12)') 'loop', 'locintv', 'basfun'
WRITE(*,'(6x,8(1pe12.3))') t_loop, t_locintv1, t_basfun1
!===============================================================================
! 2.0 Vector version
!
ngroup = 1
DO WHILE (ngroup .LT. npt/2)
ALLOCATE(vleft(ngroup))
ALLOCATE(vfun(0:nidbas, 0:jdermx, ngroup))
nset = npt/ngroup
nremain = MODULO(npt, ngroup)
IF(nremain.NE.0) nset = nset+1
!
! loop
t0 = seconds()
DO its=1,nits
i2 = 0
DO i=1,nset
i1 = i2+1
i2 = MIN(i2+ngroup,npt)
END DO
END DO
t_loop = (seconds()-t0)/REAL(nits*nset,8)
!
! locintv
t0 = seconds()
DO its=1,nits
i2 = 0
DO i=1,nset
i1 = i2+1
i2 = MIN(i2+ngroup,npt)
CALL locintv(splx, xpt(i1:i2), vleft)
END DO
END DO
t_locintv = (seconds()-t0)/REAL(nits*npt,8)
!
! basfun
t0 = seconds()
DO its=1,nits
i2 = 0
DO i=1,nset
i1 = i2+1
i2 = MIN(i2+ngroup,npt)
CALL locintv(splx, xpt(i1:i2), vleft)
CALL basfun(xpt(i1:i2), splx, vfun, vleft+1)
END DO
END DO
t_basfun = (seconds()-t0)/REAL(nits*npt,8)
!
WRITE(*,'(i6,8(1pe12.3))') ngroup, t_loop, t_locintv, t_basfun, &
& t_locintv1/t_locintv, t_basfun1/t_basfun
DEALLOCATE(vleft)
DEALLOCATE(vfun)
ngroup = ngroup*2
END DO
!===============================================================================
!
! Clean up
!
CALL destroy_sp(splx)
DEALLOCATE(xgrid)
DEALLOCATE(xpt)
DEALLOCATE(fun)
END PROGRAM main
diff --git a/examples/dirichlet/Makefile b/examples/dirichlet/Makefile
index 4f0edc9..0b6276f 100644
--- a/examples/dirichlet/Makefile
+++ b/examples/dirichlet/Makefile
@@ -1,62 +1,62 @@
#
# @file Makefile
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Emmanuel Lanti
# @author Trach-Minh Tran
#
#BSPLINES = $(HOME)/bsplines
#FUTILS = $(HOME)/futils
F90 = mpif90
LD = $(F90)
debug = -g -traceback -check bounds -warn alignments -warn nounused
optim = -O3 -xHOST
F90FLAGS = $(OPT) -I$(BSPLINES)/include -I$(FUTILS)/include
LDFLAGS = $(OPT) -L$(BSPLINES)/lib -L$(FUTILS)/lib -L$(HDF5)/lib
LIBS = -lbsplines -lpppack -lfutils -lhdf5_fortran -lhdf5 -lz
LDFLAGS += -g -L$(MKL)
LIBS += -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread
OPT=$(debug)
#OPT=$(optim)
.SUFFIXES:
.SUFFIXES: .o .c .f90
.f90.o:
$(F90) $(F90FLAGS) -c $<
all: poisson
poisson: poisson.o poisson_mod.o
$(LD) $(LDFLAGS) -o $@ $^ $(LIBS)
poisson.o: poisson_mod.o
clean:
rm -f *.o *.mod
distclean: clean
rm -f poisson a.out *~ *.h5 *.fig *.eps *.pdf
diff --git a/examples/dirichlet/poisson.f90 b/examples/dirichlet/poisson.f90
index fa96f4a..f6048e7 100644
--- a/examples/dirichlet/poisson.f90
+++ b/examples/dirichlet/poisson.f90
@@ -1,383 +1,383 @@
!>
!> @file poisson.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Solving the following 2d Poisson in cylibdrical coordinates, using splines:
!
! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 0, with f(x=1,y) = cos(my)
! exact solution: f(x,y) = r^m cos(my)
!
USE bsplines
USE matrix
USE conmat_mod
USE poisson_mod
USE futils
!
IMPLICIT NONE
INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, dirmeth, nterms
LOGICAL :: nlppform
INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it
DOUBLE PRECISION :: pi, coefx(5), coefy(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
TYPE(spline2d) :: splxy
TYPE(pbmat) :: mat
!
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
DOUBLE PRECISION :: seconds, mem, dopla
DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2, shifty
DOUBLE PRECISION :: err00, err10, err01
INTEGER :: nits=500
!
CHARACTER(len=128) :: file='poisson.h5'
INTEGER :: fid
!
! Dirichlet BC properties encapsulated in a derived datatype
!
TYPE(dirich) :: right_bc
!
NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, dirmeth, &
& coefx, coefy
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number of intervals in x
ny = 8 ! Number of intervals in y
nidbas = (/3,3/) ! Degree of splines
ngauss = (/4,4/) ! Number of Gauss points/interval
mbess = 2 ! Exponent of differential problem
nterms = 2 ! Number of terms in weak form
nlppform = .TRUE. ! Use PPFORM for gridval or not
dirmeth = 1 ! 1: use spline interpolation in Dirichlet BC
! 2: residual minimization in Dirichlet BC
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x (=radial) & y (=poloidal) axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny))
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi
CALL meshdist(coefy, ygrid, ny)
!
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
CALL set_spline(nidbas, ngauss, &
& xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
!
! FE matrix assembly
!
nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix
kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals
ku = kl ! Number of super-diagnonals
WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
!
CALL init(ku, nrank, nterms, mat)
t0 = seconds()
CALL conmat(splxy, mat, coefeq)
tmat = seconds() - t0
ALLOCATE(arr(nrank))
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(mbess, splxy, rhs)
!
! Store some usefull parameters in right_bc
!
right_bc%meth = dirmeth
right_bc%mbess = mbess
right_bc%n1 = nx
right_bc%n2 = ny
right_bc%nidbas1 = nidbas(1)
right_bc%nidbas2 = nidbas(2)
!
! BC on Matrix and RHS
!
CALL ibcmat(mat, right_bc)
CALL ibcrhs(rhs, ygrid, right_bc)
WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
!===========================================================================
! 3.0 Solve the dicretized system
!
t0 = seconds()
CALL factor(mat)
tfact = seconds() - t0
gflops1 = dopla('DPBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
t0 = seconds()
CALL bsolve(mat, rhs, sol)
!
! Backtransform of solution
!
sol(1:ny-1) = sol(ny)
!
! Spline coefficients, taking into account of periodicity in y
! Note: in SOL, y was numbered first.
!
dimx = splxy%sp1%dim
dimy = splxy%sp2%dim
ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
DO j=0,dimy-1
DO i=0,dimx-1
ij = MODULO(j,ny) + i*ny + 1
bcoef(i,j) = sol(ij)
END DO
END DO
!
tsolv = seconds() - t0
gflops2 = dopla('DPBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
!===========================================================================
! 4.0 Check the solution
!
!
! Check function values computed with various method
!
ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
DO i=0,nx
DO j=0,ny
solana(i,j) = f_exact(mbess, xgrid(i), ygrid(j))
END DO
END DO
jder = (/0,0/)
!
! Compute PPFORM at first call to gridval
IF(nlppform) THEN
CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
END IF
!
WRITE(*,'(/a)') '*** Checking solutions'
t0 = seconds()
DO it=1,nits ! nits iterations for timing
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
END DO
tgrid = (seconds() - t0)/REAL(nits)
!
errsol = solana - solcal
WRITE(*,'(a/(8(1pe12.3)))') 'Error at the boundary r = 1', errsol(nx,:)
err00 = err2_norm(splxy, jder, mbess, f_exact)
!
CALL putarr(fid, '/xgrid', xgrid, 'r')
CALL putarr(fid, '/ygrid', ygrid, '\theta')
CALL putarr(fid, '/sol', solcal, 'Solutions')
CALL putarr(fid, '/solana', solana,'Exact solutions')
CALL putarr(fid, '/errors', errsol, 'Errors')
!
! Check derivatives d/dx and d/dy
!
WRITE(*,'(/a)') '*** Checking gradient'
DO i=0,nx
DO j=0,ny
IF( mbess .EQ. 0 ) THEN
solana(i,j) = 0.0d0
ELSE
solana(i,j) = fx_exact(mbess,xgrid(i),ygrid(j))
END IF
END DO
END DO
!
jder = (/1,0/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
!
errsol = solana - solcal
err10 = err2_norm(splxy, jder, mbess, fx_exact)
!
DO i=0,nx
DO j=0,ny
IF( mbess .EQ. 0 ) THEN
solana(i,j) = 0.0d0
ELSE
solana(i,j) = fy_exact(mbess, xgrid(i), ygrid(j))
END IF
END DO
END DO
!
jder = (/0,1/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
!
errsol = solana - solcal
err01 = err2_norm(splxy, jder, mbess, fy_exact)
!
WRITE(*,'(/a,3(1pe12.3))') 'Discretization errors', err00, err10, err01
!===========================================================================
! 9.0 Epilogue
!
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv
WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid
WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
!
DEALLOCATE(xgrid, rhs, sol)
DEALLOCATE(solcal, solana, errsol)
DEALLOCATE(bcoef)
DEALLOCATE(arr)
CALL destroy_sp(splxy)
CALL destroy(mat)
CALL closef(fid)
!
!===========================================================================
!
CONTAINS
!--
DOUBLE PRECISION FUNCTION f_exact(m,x,y)
INTEGER,INTENT(in) :: m
DOUBLE PRECISION, INTENT(in) :: x, y
f_exact = (x**m)*COS(m*y)
END FUNCTION f_exact
!--
DOUBLE PRECISION FUNCTION fx_exact(m,x,y)
INTEGER,INTENT(in) :: m
DOUBLE PRECISION, INTENT(in) :: x, y
fx_exact = m*(x**(m-1))*COS(m*y)
END FUNCTION fx_exact
!--
DOUBLE PRECISION FUNCTION fy_exact(m,x,y)
INTEGER,INTENT(in) :: m
DOUBLE PRECISION, INTENT(in) :: x, y
fy_exact = -m*(x**m)*SIN(m*y)
END FUNCTION fy_exact
!--
SUBROUTINE prntmat(str, a)
DOUBLE PRECISION, DIMENSION(:,:) :: a
CHARACTER(len=*) :: str
INTEGER :: i
WRITE(*,'(a)') TRIM(str)
DO i=1,SIZE(a,1)
WRITE(*,'(10f8.1)') a(i,:)
END DO
END SUBROUTINE prntmat
!--
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:,:)
DOUBLE PRECISION :: sum2
INTEGER :: i, j
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
DO j=1,SIZE(x,2)
sum2 = sum2 + x(i,j)**2
END DO
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
!--
SUBROUTINE prnmat(label, mat)
CHARACTER(len=*) :: label
DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
INTEGER :: i
WRITE(*,'(/a)') TRIM(label)
DO i=1,SIZE(mat,1)
WRITE(*,'(10(1pe12.3))') mat(i,:)
END DO
END SUBROUTINE prnmat
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
!--
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
diff --git a/examples/dirichlet/poisson.m b/examples/dirichlet/poisson.m
index 3a56ac8..bb5c2e3 100644
--- a/examples/dirichlet/poisson.m
+++ b/examples/dirichlet/poisson.m
@@ -1,88 +1,88 @@
%
% @file poisson.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='poisson.h5';
m=3;
%
% Get data from data sets
%
r=hdf5read(file,'/xgrid');
t=hdf5read(file,'/ygrid');
sol=hdf5read(file,'/sol')';
solexact=hdf5read(file,'/solana')';
err=hdf5read(file,'/errors')';
solr=hdf5read(file,'/derivx')';
solt=hdf5read(file,'/derivy')';
[R,T]=meshgrid(r,t);
x = R.*cos(T); y= R.*sin(T);
solx = cos(T).*solr - sin(T)./R.*solt;
soly = sin(T).*solr + cos(T)./R.*solt;
figure
subplot(221)
pcolor(double(r),double(t),double(sol));
shading interp
hold on, quiver(r,t,solr,solt)
xlabel('r'); ylabel('\theta')
title('R-THETA plane')
colorbar
subplot(222)
pcolor(double(x),double(y),double(sol))
shading interp
hold on, quiver(x,y,solx,soly)
hold off, axis image
xlabel('X'); ylabel('Y')
title('X-Y plane')
colorbar
subplot(223)
surfc(double(x),double(y),double(sol))
xlabel('X'); ylabel('Y');
title('Solutions')
subplot(224)
surfc(double(x),double(y),double(err))
xlabel('X'); ylabel('Y');
title('Errors')
figure
subplot(211)
plot(r,sol(1,:),'o',r,solexact(1,:))
xlabel('r')
ylabel('Solutions at \theta=0')
grid on
subplot(212)
tt=0:0.01:2*pi;
plot(t,sol(:,end),'o',tt,cos(m.*tt))
xlabel('\theta')
ylabel('Solutions at r=1')
grid on
diff --git a/examples/dirichlet/poisson_mod.f90 b/examples/dirichlet/poisson_mod.f90
index ae234c5..96e9417 100644
--- a/examples/dirichlet/poisson_mod.f90
+++ b/examples/dirichlet/poisson_mod.f90
@@ -1,354 +1,354 @@
!>
!> @file poisson_mod.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
MODULE poisson_mod
IMPLICIT NONE
!
! Dirichlet BC encapsulated in a derived datatype
!
TYPE dirich
INTEGER :: meth, mbess, n1, n2, nidbas1, nidbas2
INTEGER :: i0, i1
DOUBLE PRECISION, POINTER :: amat(:,:) => NULL()
DOUBLE PRECISION, POINTER :: g(:) => NULL()
END TYPE dirich
!
CONTAINS
SUBROUTINE disrhs(mbess, spl, rhs)
!
! Assembly the RHS using 2d spline spl
!
USE bsplines
INTEGER, INTENT(in) :: mbess
TYPE(spline2d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
!
! The RHS is 0
!
rhs(:) = 0.0d0
END SUBROUTINE disrhs
!
SUBROUTINE ibcmat(mat, bc)
!
! Apply BC on matrix
!
USE matrix
TYPE(pbmat), INTENT(inout) :: mat
TYPE(dirich) :: bc
INTEGER :: ny
INTEGER :: kl, ku, nrank, i, j, k
DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
INTEGER :: i0, i1, ii
!===========================================================================
! 1.0 Prologue
!
ku = mat%ku
kl = ku
nrank = mat%rank
ny = bc%n2
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum on the NY-th row
!
ALLOCATE(zsum(nrank))
ALLOCATE(arr(nrank))
zsum = 0.0d0
DO i=1,ny
arr = 0.0d0
CALL getrow(mat, i, arr)
DO j=1,ny+ku
zsum(j) = zsum(j) + arr(j)
END DO
END DO
!
zsum(ny) = SUM(zsum(1:ny)) ! using symmetry
CALL putrow(mat, ny, zsum)
DEALLOCATE(zsum)
!
! The away operator
!
DO i = 1,ny-1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
DEALLOCATE(arr)
!===========================================================================
! 3.0 Dirichlet on right boundary
!
!!$ i0 = nrank - ku
!!$ i1 = nrank - ny
i0 = (bc%n1-1)*bc%n2 + 1
i1 = nrank - bc%n2
bc%i0 = i0
bc%i1 = i1
!
IF(ASSOCIATED(bc%amat)) DEALLOCATE(bc%amat)
IF(ASSOCIATED(bc%g)) DEALLOCATE(bc%g)
ALLOCATE(bc%amat(i0:i1,ny))
ALLOCATE(bc%g(ny))
!
WRITE(*,'(/a,2i6)') 'IBCMAT: i0, i1 =', i0, i1
!
! Extract and save the last ny columns of matrix
!
ALLOCATE(arr(nrank))
DO k=1,ny
j = nrank-ny+k
CALL getcol(mat, j, arr)
bc%amat(i0:i1,k) = arr(i0:i1)
IF( ANY(arr(1:i0-1) .NE. 0.0d0) ) THEN
WRITE(*,'(a,i4)') 'i0 is underestimated for j =', j
END IF
END DO
!
! The away operator
!
DO k=1,ny
j = nrank-ny+k
arr = 0.0d0; arr(j) = 1.0d0
CALL putrow(mat, j, arr)
END DO
!
DEALLOCATE(arr)
!
END SUBROUTINE ibcmat
!+++
SUBROUTINE ibcrhs(rhs, ygrid, bc)
!
! Apply BC on RHS
!
DOUBLE PRECISION, INTENT(inout) :: rhs(:)
DOUBLE PRECISION, INTENT(in) :: ygrid(:)
TYPE(dirich) :: bc
!
INTEGER :: nrank, ny, m, i0, i1
DOUBLE PRECISION :: zsum
!===========================================================================
! 1.0 Prologue
!
nrank = SIZE(rhs,1)
ny = bc%n2
m = bc%mbess
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum
!
zsum = SUM(rhs(1:ny))
rhs(ny) = zsum
rhs(1:ny-1) = 0.0d0
!===========================================================================
! 3.0 Dirichlet on right boundary
!
! Get spline coefs at boundary r=1
!
SELECT CASE (bc%meth)
CASE(1)
CALL dirich_interp(ygrid, bc, frhs)
CASE(2)
CALL dirich_minres(ygrid, bc, frhs)
END SELECT
!
! Modify RHS
!
i0 = bc%i0
i1 = bc%i1
rhs(i0:i1) = rhs(i0:i1) - MATMUL(bc%amat, bc%g)
rhs(i1+1:nrank) = bc%g(1:ny)
CONTAINS
DOUBLE PRECISION FUNCTION frhs(x)
DOUBLE PRECISION, INTENT(in) :: x
frhs = COS(m*x)
END FUNCTION frhs
END SUBROUTINE ibcrhs
!++++
SUBROUTINE coefeq(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
!
c(1) = x !
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.d0/x
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
!++++
SUBROUTINE dirich_interp(ygrid, bc, frhs)
!
! Dirichlet BC by interpolation
!
USE bsplines
DOUBLE PRECISION, INTENT(in) :: ygrid(:)
TYPE(dirich) :: bc
INTERFACE
DOUBLE PRECISION FUNCTION frhs(x)
DOUBLE PRECISION, INTENT(in) :: x
END FUNCTION frhs
END INTERFACE
!
INTEGER :: nidbas, dim, n2, i
DOUBLE PRECISION :: shifty
DOUBLE PRECISION :: gval(SIZE(ygrid))
DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
DOUBLE PRECISION :: ygrid_interp(SIZE(ygrid))
TYPE(spline1d) :: spl_interp
!
nidbas = bc%nidbas2
n2 = bc%n2
!
IF(MODULO(nidbas,2) .EQ. 0 ) THEN
shifty = 0.5d0*(ygrid(2)-ygrid(1))
ygrid_interp(:) = ygrid(:) + shifty
ELSE
ygrid_interp(:) = ygrid(:)
END IF
CALL set_splcoef(nidbas, ygrid_interp, spl_interp, period=.TRUE.)
CALL get_dim(spl_interp, dim)
ALLOCATE(coefs(dim))
!
DO i=1,SIZE(ygrid)
gval(i) = frhs(ygrid_interp(i))
END DO
CALL get_splcoef(spl_interp, gval, coefs)
!
! Store spline coefs in bc
!
bc%g(1:n2) = coefs(1:n2)
!
DEALLOCATE(coefs)
CALL destroy_sp(spl_interp)
END SUBROUTINE dirich_interp
!++++
SUBROUTINE dirich_minres(xgrid, bc, frhs)
!
! Dirichlet BC by minimization of residual
!
USE bsplines
USE matrix
USE conmat_mod
DOUBLE PRECISION, INTENT(in) :: xgrid(:)
TYPE(dirich) :: bc
INTERFACE
DOUBLE PRECISION FUNCTION frhs(x)
DOUBLE PRECISION, INTENT(in) :: x
END FUNCTION frhs
END INTERFACE
!
INTEGER :: nx, nidbas, ngauss, kl, ku
TYPE(periodic_mat) :: mass_mat
TYPE(spline1d) :: spl
!
nidbas = bc%nidbas2
ngauss = nidbas+1
nx = bc%n2
kl = nidbas
ku = kl
!
CALL set_spline(nidbas, ngauss, xgrid, spl, period=.TRUE.)
CALL init(kl, ku, nx, 1, mass_mat)
CALL conmat(spl, mass_mat, coefeq_mass)
CALL conrhs(spl, bc%g, frhs)
CALL factor(mass_mat)
CALL bsolve(mass_mat, bc%g)
!
CALL destroy(mass_mat)
CALL destroy_sp(spl)
!
CONTAINS
SUBROUTINE coefeq_mass(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(:)
c(1) = 1.0d0
idt(1) = 0
idw(1) = 0
END SUBROUTINE coefeq_mass
END SUBROUTINE dirich_minres
!++++
DOUBLE PRECISION FUNCTION err2_norm(spl, jder, mbess, fexact)
!
! Compute error L2 norm unsing Gauss points
!
USE bsplines
TYPE(spline2d) :: spl
INTEGER, INTENT(in) :: jder(:)
INTEGER, INTENT(in) :: mbess
INTERFACE
DOUBLE PRECISION FUNCTION fexact(m,x,y)
INTEGER, INTENT(in) :: m
DOUBLE PRECISION, INTENT(in) :: x, y
END FUNCTION fexact
END INTERFACE
!
DOUBLE PRECISION, POINTER :: xg1(:,:), xg2(:,:), wg1(:,:), wg2(:,:)
INTEGER :: i1, ig1, n1, nidbas1, ndim1, ng1
INTEGER :: i2, ig2, n2, nidbas2, ndim2, ng2
DOUBLE PRECISION :: contrib
DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:), sol(:,:)
!
! Gauss points and weights on all intervals
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
xg1 => spl%sp1%gausx ! xg1(ng1,n1)
wg1 => spl%sp1%gausw ! wg1(ng1,n1)
ng1 = SIZE(xg1,1)
xg2 => spl%sp2%gausx
wg2 => spl%sp2%gausw
ng2 = SIZE(xg2,1)
!
err2_norm = 0.0d0
ALLOCATE(x(ng1), y(ng2))
ALLOCATE(sol(ng1,ng2))
DO i1=1,n1
x=xg1(:,i1)
DO i2=1,n2
y=xg2(:,i2)
CALL gridval(spl, x, y, sol, jder)
DO ig1=1,ng1
DO ig2=1,ng2
contrib = wg1(ig1,i1)*wg2(ig2,i2)*(sol(ig1,ig2) - &
& fexact(mbess,x(ig1),y(ig2)))**2
err2_norm = err2_norm + x(ig1)*contrib !use same inner-product in weak-form
END DO
END DO
END DO
END DO
DEALLOCATE(x)
DEALLOCATE(y)
DEALLOCATE(sol)
err2_norm = SQRT(err2_norm)
END FUNCTION err2_norm
END MODULE poisson_mod
diff --git a/examples/dirichlet/run_poisson.sh b/examples/dirichlet/run_poisson.sh
index dafc434..7833785 100644
--- a/examples/dirichlet/run_poisson.sh
+++ b/examples/dirichlet/run_poisson.sh
@@ -1,46 +1,46 @@
#
# @file run_poisson.sh
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Trach-Minh Tran
#
#!/bin/sh
EXEC=./poisson
cat > in0 < in1
$EXEC < in1 | grep 'Discretization errors '
done
rm -f in?
diff --git a/examples/dismat.f90 b/examples/dismat.f90
index a15e8df..c4bf0ed 100644
--- a/examples/dismat.f90
+++ b/examples/dismat.f90
@@ -1,157 +1,157 @@
!>
!> @file dismat.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
SUBROUTINE dismat(spl, mat)
!
! Assembly of FE matrix mat using spline spl
!
USE bsplines
USE matrix
IMPLICIT NONE
TYPE(spline2d), INTENT(in) :: spl
TYPE(gbmat), INTENT(inout) :: mat
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2
INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
DOUBLE PRECISION:: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order
DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form
INTEGER, ALLOCATABLE :: left1(:), left2(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
ALLOCATE(coefs(kterms,ng1,ng2))
!
ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
ALLOCATE(fun2(0:nidbas2,0:1,ng2)) !
!===========================================================================
! 2.0 Assembly loop
!
ALLOCATE(left1(ng1))
ALLOCATE(left2(ng2))
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
left1 = i
CALL basfun(xg1, spl%sp1, fun1, left1)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
left2 = j
CALL basfun(xg2, spl%sp2, fun2, left2)
!
DO ig1=1,ng1
DO ig2=1,ng2
CALL coefeq(xg1(ig1), xg2(ig2), &
& idert(:,:,ig1,ig2), &
& iderw(:,:,ig1,ig2), &
& coefs(:,ig1,ig2))
END DO
END DO
!
DO iw1=0,nidbas1 ! Weight function in dir 1
igw1 = i+iw1
DO iw2=0,nidbas2 ! Weight function in dir 2
igw2 = MODULO(j+iw2-1, n2) + 1
irow = igw2 + (igw1-1)*n2
DO it1=0,nidbas1 ! Test function in dir 1
igt1 = i+it1
DO it2=0,nidbas2 ! Test function in dir 2
igt2 = MODULO(j+it2-1, n2) + 1
jcol = igt2 + (igt1-1)*n2
!-------------
contrib = 0.0d0
DO ig1=1,ng1
DO ig2=1,ng2
DO iterm=1,kterms
contrib = contrib + &
& fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
& fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
& coefs(iterm,ig1,ig2) * &
& fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
& fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
& wg1(ig1) * wg2(ig2)
END DO
END DO
END DO
CALL updtmat(mat, irow, jcol, contrib)
!-------------
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
DEALLOCATE(idert, iderw, coefs)
DEALLOCATE(left1,left2)
!
CONTAINS
SUBROUTINE coefeq(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
!
! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
!
c(1) = x !
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.d0/x
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
diff --git a/examples/disrhs.f90 b/examples/disrhs.f90
index 24cbb84..cf29fd5 100644
--- a/examples/disrhs.f90
+++ b/examples/disrhs.f90
@@ -1,198 +1,198 @@
!>
!> @file disrhs.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
SUBROUTINE disrhs(mbess, spl, rhs)
!
! Assembly the RHS using 2d spline spl
!
USE bsplines
IMPLICIT NONE
INTEGER, INTENT(in) :: mbess
TYPE(spline2d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
!
ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!===========================================================================
! 2.0 Assembly loop
!
nrank = SIZE(rhs)
rhs(1:nrank) = 0.0d0
!
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), spl%sp1, fun1, i)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), spl%sp2, fun2, j)
contrib = wg1(ig1)*wg2(ig2) * &
& rhseq(xg1(ig1),xg2(ig2), mbess)
DO k1=0,nidbas1
i1 = i+k1
DO k2=0,nidbas2
j2 = MODULO(j+k2-1,n2) + 1
ij = j2 + (i1-1)*n2
rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
DOUBLE PRECISION, INTENT(in) :: x1, x2
INTEGER, INTENT(in) :: m
rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
END FUNCTION rhseq
END SUBROUTINE disrhs
!
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE disrhs3(mbess, npow, spl, rhs)
!
! Assembly the RHS using 3d spline spl
!
USE bsplines
IMPLICIT NONE
INTEGER, INTENT(in) :: mbess, npow
TYPE(spline2d1d), TARGET :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:,:)
!
TYPE(spline1d), POINTER :: sp1, sp2, sp3
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: n3, nidbas3, ndim3, ng3
INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
sp1 => spl%sp12%sp1
sp2 => spl%sp12%sp2
sp3 => spl%sp3
!
CALL get_dim(sp1, ndim1, n1, nidbas1)
CALL get_dim(sp2, ndim2, n2, nidbas2)
CALL get_dim(sp3, ndim3, n3, nidbas3)
!
ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(sp1, ng1)
CALL get_gauss(sp2, ng2)
CALL get_gauss(sp3, ng3)
WRITE(*,'(/a, 3i3)') 'Gauss points and weights, ngauss =', ng1, ng2, ng3
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng2), wg2(ng2))
ALLOCATE(xg3(ng3), wg3(ng3))
!===========================================================================
! 2.0 Assembly loop
!
nrank = SIZE(rhs,1)
rhs(1:nrank,1:n3) = 0.0d0
!
DO i=1,n1
CALL get_gauss(sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), sp1, fun1, i)
DO j=1,n2
CALL get_gauss(sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), sp2, fun2, j)
DO k=1,n3
CALL get_gauss(sp3, ng3, k, xg3, wg3)
DO ig3=1,ng3
CALL basfun(xg3(ig3), sp3, fun3, k)
contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * &
& rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow)
DO k1=0,nidbas1
i1 = i+k1
DO k2=0,nidbas2
j2 = MODULO(j+k2-1,n2) + 1
ij = j2 + (i1-1)*n2
DO k3=0,nidbas3
kk = MODULO(k+k3-1,n3) + 1
rhs(ij,kk) = rhs(ij, kk) + &
& contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1)
END DO
END DO
END DO
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
DEALLOCATE(xg3, wg3, fun3)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n)
DOUBLE PRECISION, INTENT(in) :: x1, x2, x3
INTEGER, INTENT(in) :: m, n
rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n
END FUNCTION rhseq
END SUBROUTINE disrhs3
diff --git a/examples/driv1.f90 b/examples/driv1.f90
index 03a9329..e725b38 100644
--- a/examples/driv1.f90
+++ b/examples/driv1.f90
@@ -1,189 +1,189 @@
!>
!> @file driv1.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Basis splines on a 2d grid.
!
USE bsplines
USE futils
IMPLICIT NONE
TYPE(spline1d) :: spx, spy
INTEGER :: nx=10, ny=8, nidbas=2, ngauss=4, npts=1000
DOUBLE PRECISION :: a, b, coefx(5), coefy(5)
DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), ygrid(:), fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xp(:), funxp(:,:), yp(:), funyp(:,:)
DOUBLE PRECISION :: dx, dy
INTEGER :: i, j, left
CHARACTER(len=256) :: title
INTEGER :: fid
NAMELIST /newrun/ nx, ny, nidbas, ngauss, a, b, coefx, coefy
!===========================================================================
nidbas = 3
ngauss = 4
nx = 10 ! Number of intevals in x
ny = 8 ! Number of intevals in y
a = 0.0d0 ! Left boundary of interval
b = 1.0d0 ! Right boundary of interval
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
READ(*,newrun)
WRITE(*,newrun)
!===========================================================================
! 1.0 Set up grids
!
ALLOCATE( xgrid(0:nx) )
xgrid(0) = a
xgrid(nx) = b
CALL meshdist(coefx, xgrid, nx)
!
!!$ dy = 2.d0*pi/REAL(ny)
dy = 1.0d0
ALLOCATE( ygrid(0:ny) )
ygrid(0) = a
ygrid(ny) = b
CALL meshdist(coefy, ygrid, ny)
!===========================================================================
! 2.0 Set up splines on (x,y)
!
CALL set_spline(nidbas, ngauss, xgrid, spx)
CALL set_spline(nidbas, ngauss, ygrid, spy, period=.TRUE.)
WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of x-splines', LBOUND(spx%knots), &
& ':',UBOUND(spx%knots), spx%knots
WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', spx%dim
WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of y-splines', LBOUND(spy%knots), &
& ':',UBOUND(spy%knots), spy%knots
WRITE(*,'(2(a,i5, 2x))') 'NY =', ny, 'DIM =', spy%dim
!===========================================================================
! 3.0 Graph the splines on (x,y)
!
ALLOCATE( fun(nidbas+1,1) ) ! Only 0-th derivative
!!$ ALLOCATE( fun(nidbas+1,0:1) ) ! Only 0-th derivative
ALLOCATE( xp(npts), funxp(npts,0:spx%dim-1) )
ALLOCATE( yp(npts-1), funyp(npts-1,0:spy%dim-1) )
!
! Splines in X (non-peridic)
!
WRITE(*,'(a)') 'Splines in x'
dx = (xgrid(nx)-xgrid(0)) / REAL(NPTS-1)
DO i=1,npts
xp(i) = xgrid(0) + (i-1)*dx
CALL locintv(spx, xp(i), left)
CALL basfun(xp(i), spx, fun, left+1)
funxp(i,left:left+nidbas) = fun(:,1)
END DO
!
! Splines in Y (periodic)
!
WRITE(*,'(a)') 'Splines in y'
dy = (ygrid(ny)-ygrid(0)) / REAL(NPTS-1)
DO i=1,npts-1
yp(i) = ygrid(0) + (i-1)*dy
CALL locintv(spy, yp(i), left)
CALL basfun(yp(i), spy, fun, left+1)
funyp(i,left:left+nidbas) = fun(:,1)
END DO
!
! Create hdf5 file
!
CALL creatf('driv1.h5', fid, real_prec='d')
!
WRITE(title,'(a,i3,5x,a,i6)') 'Splines of degree =', nidbas, 'NX =', nx
CALL putarr(fid, 'X', xp)
CALL putarr(fid, 'KNOTSX', spx%knots)
CALL putarr(fid, 'splinesx', funxp, TRIM(title))
CALL putarr(fid, 'KNOTSY', spy%knots)
!
WRITE(title,'(a,i3,5x,a,i6)') 'Periodic splines of degree =', nidbas, 'NY =', ny
CALL putarr(fid, 'Y', yp)
CALL putarr(fid, 'splinesy', funyp, TRIM(title))
CALL closef(fid)
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgrid, ygrid)
DEALLOCATE(xp, funxp)
CALL destroy_sp(spx)
CALL destroy_sp(spy)
END PROGRAM main
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
diff --git a/examples/driv2.f90 b/examples/driv2.f90
index b67b578..720f624 100644
--- a/examples/driv2.f90
+++ b/examples/driv2.f90
@@ -1,180 +1,180 @@
!>
!> @file driv2.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Integration of splines
!
USE bsplines
!
IMPLICIT NONE
TYPE(spline1d) :: spx
INTEGER :: nx, nidbas, ngauss
DOUBLE PRECISION :: a, b, coefx(5)
DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fun(:,:), finteg(:)
DOUBLE PRECISION, ALLOCATABLE :: xg(:), wg(:)
DOUBLE PRECISION :: support, res, err
INTEGER :: i, ig, j, jj, left
INTEGER :: dim, ng
LOGICAL :: periodic
NAMELIST /newrun/ periodic, nx, nidbas, ngauss, a, b, coefx
!===========================================================================
! 1.0 Set up grids
!
! Read in data specific to run
!
periodic = .FALSE.
nidbas = 3
ngauss = 4
nx = 10 ! Number of intevals in x
a = 0.0d0 ! Left boundary of interval
b = 1.0d0 ! Right boundary of interval
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid/knots
!
ALLOCATE(xgrid(0:nx))
xgrid(0) = a
xgrid(nx) = b
CALL meshdist(coefx, xgrid, nx)
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
!===========================================================================
! 2.0 Set up splines
!
CALL set_spline(nidbas, ngauss, xgrid, spx, periodic)
WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of x-splines', LBOUND(spx%knots), &
& ':',UBOUND(spx%knots), spx%knots
WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', spx%dim
!===========================================================================
! 3.0 Integrate all splines
!
CALL get_dim(spx, dim)
ALLOCATE( finteg(0:dim-1), xg(ngauss), wg(ngauss), fun(0:nidbas,1) )
finteg = 0.0
fun = 0.0
DO i=1,nx ! Loop thru the intervals
CALL get_gauss(spx, ng, i, xg, wg)
DO ig=1,ng ! Loop thru Gauss points
CALL basfun(xg(ig), spx, fun, i)
left = i-1
DO j=0,nidbas ! Loop thru the splines in this interval
jj = left+j
IF( periodic ) jj = MODULO(left+j, nx)
finteg(jj) = finteg(jj) + wg(ig)*fun(j,1)
END DO
END DO
END DO
!!$ IF( periodic ) THEN
!!$ DO i=nx,dim-1
!!$ finteg(i) = finteg(i-nx)
!!$ END DO
!!$ END IF
!
WRITE(*,'(a/(10f10.5))') 'Integrals of splines', finteg
PRINT*, 'Sum of finteg', SUM(finteg)
!!$ IF( periodic ) THEN
!!$ PRINT*, 'Sum of finteg', SUM(finteg(0:nx-1))
!!$ ELSE
!!$ PRINT*, 'Sum of finteg', SUM(finteg)
!!$ END IF
!
WRITE(*,'(a/(10f10.5))') 'Integrals of splines from module', spx%intspl
PRINT*, 'Sum of finteg', SUM(spx%intspl)
WRITE(*,'(a5,4a12)') '#', 'I', 'S', '(p+1)I/S', '(p+1)I/S-1'
DO i=0,spx%dim-1
support = spx%knots(i+1)-spx%knots(i-nidbas)
res = spx%intspl(i)/support*(nidbas+1)
err = res - 1.0d0
WRITE(*,'(i5,4(1pe12.4))') i, spx%intspl(i), support, res, err
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE( finteg, xg, wg, fun )
DEALLOCATE(xgrid)
CALL destroy_sp(spx)
END PROGRAM main
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
diff --git a/examples/driv3.f90 b/examples/driv3.f90
index 8df6e99..cce04fd 100644
--- a/examples/driv3.f90
+++ b/examples/driv3.f90
@@ -1,159 +1,159 @@
!>
!> @file driv3.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Computation of croos mass matrix between two splines sp1 & sp2
! sp1 and sp2 can be splines of any type (i.e. either set up with set_spline or
! set_splcoef) and of any order.
!
USE bsplines
!
IMPLICIT NONE
TYPE(spline1d) :: sp1, sp2
INTEGER :: nx, nidbas1, nidbas2, ngauss
INTEGER :: i, j
DOUBLE PRECISION :: a, b, coefx(5)
DOUBLE PRECISION, ALLOCATABLE :: xgrid(:)
DOUBLE PRECISION, DIMENSION(:, :), POINTER :: MassMat
LOGICAL :: periodic1, periodic2
NAMELIST /newrun/ nx, a, b, coefx, nidbas1, nidbas2, periodic1, periodic2
!===========================================================================
! 1.0 Set up grids
!
! Read in data specific to run
!
nx = 8 ! Number of intevals in x
a = 0.0d0 ! Left boundary of interval
b = 1.0d0 ! Right boundary of interval
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
periodic1 = .FALSE.
periodic2 = .FALSE.
nidbas1 = 3
nidbas2 = 2
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid/knots
!
ALLOCATE(xgrid(0:nx))
xgrid(0 ) = a
xgrid(nx) = b
CALL meshdist(coefx, xgrid, nx)
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
!===========================================================================
! 2.0 Set up splines
!
ngauss = 1 ! Gauss points initialized with set_spline are in fact not used
! for computing cross mass matrix
! First spline set up as for solving a PDE with FEMs
CALL set_spline(nidbas1, ngauss, xgrid, sp1, periodic1)
! Second spline set up as for interpolation
CALL set_splcoef(nidbas2, xgrid, sp2, periodic2)
WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp1', LBOUND(sp1%knots), &
& ':',UBOUND(sp1%knots), sp1%knots
WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp2', LBOUND(sp2%knots), &
& ':',UBOUND(sp2%knots), sp2%knots
WRITE(*,'(3(a,i5, 2x))') 'NX =', nx, 'DIM sp1 =', sp1%dim, 'DIM sp2 =', sp2%dim
!===========================================================================
! 3.0 Compute cross mass matrix
!
CALL CompMassMatrix(sp1, sp2, a, b, MassMat)
WRITE(*, "(a)") "Cross-mass matrix between splines sp1 & sp2:"
DO i = 1, SIZE(MassMat, 1)
WRITE(*, "(15f13.5)") (MassMat(i, j), j = 1, MIN(SIZE(MassMat, 2), 15))
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(MassMat)
DEALLOCATE(xgrid)
CALL destroy_sp(sp1)
CALL destroy_sp(sp2)
END PROGRAM main
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
diff --git a/examples/driv4.f90 b/examples/driv4.f90
index 63711b7..3e7cad4 100644
--- a/examples/driv4.f90
+++ b/examples/driv4.f90
@@ -1,225 +1,225 @@
!>
!> @file driv4.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Computation of croos mass matrix between two splines sp1 & sp2
! sp1 and sp2 can be splines of any type (i.e. either set up with set_spline or
! set_splcoef) and of any order.
!
USE bsplines
USE matrix
!
IMPLICIT NONE
TYPE(gbmat) :: matm
INTEGER :: mrows, ncols, kl, ku
DOUBLE PRECISION, ALLOCATABLE :: avec(:,:), bvec(:,:), matfull(:,:)
!
TYPE(zgbmat) :: zmatm
DOUBLE COMPLEX, ALLOCATABLE :: zavec(:,:), zbvec(:,:)
DOUBLE PRECISION :: dznrm2
!
TYPE(spline1d) :: sp1, sp2
INTEGER :: nx, nidbas1, nidbas2, ngauss
INTEGER :: i, j
DOUBLE PRECISION :: a, b, coefx(5)
DOUBLE PRECISION, ALLOCATABLE :: xgrid(:)
DOUBLE PRECISION, DIMENSION(:, :), POINTER :: MassMat
LOGICAL :: periodic1, periodic2
NAMELIST /newrun/ nx, a, b, coefx, nidbas1, nidbas2, periodic1, periodic2
!===========================================================================
! 1.0 Set up grids
!
! Read in data specific to run
!
nx = 8 ! Number of intevals in x
a = 0.0d0 ! Left boundary of interval
b = 1.0d0 ! Right boundary of interval
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
periodic1 = .FALSE.
periodic2 = .FALSE.
nidbas1 = 3
nidbas2 = 2
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid/knots
!
ALLOCATE(xgrid(0:nx))
xgrid(0 ) = a
xgrid(nx) = b
CALL meshdist(coefx, xgrid, nx)
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
!===========================================================================
! 2.0 Set up splines
!
ngauss = 1 ! Gauss points initialized with set_spline are in fact not used
! for computing cross mass matrix
! First spline set up as for solving a PDE with FEMs
CALL set_spline(nidbas1, ngauss, xgrid, sp1, periodic1)
! Second spline set up as for interpolation
!!$ CALL set_splcoef(nidbas2, xgrid, sp2, periodic2)
CALL set_spline(nidbas2, ngauss, xgrid, sp2, periodic2)
WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp1', LBOUND(sp1%knots), &
& ':',UBOUND(sp1%knots), sp1%knots
WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp2', LBOUND(sp2%knots), &
& ':',UBOUND(sp2%knots), sp2%knots
WRITE(*,'(3(a,i5, 2x))') 'NX =', nx, 'DIM sp1 =', sp1%dim, 'DIM sp2 =', sp2%dim
!===========================================================================
! 3.0 Compute cross mass matrix
!
CALL CompMassMatrix(sp1, sp2, a, b, MassMat)
WRITE(*, "(a)") "Cross-mass matrix between splines sp1 & sp2:"
DO i = 1, SIZE(MassMat, 1)
WRITE(*, "(15f10.5)") (MassMat(i, j), j = 1, MIN(SIZE(MassMat, 2), 15))
END DO
!
! Should equal to 1 for splines i "not close to the boundaries":
! p1 .LT. i .LE. N
!
WRITE(*,'(/a/(15f8.5))') 'Sum of cols * NX', SUM(MassMat,dim=2)*REAL(nx,8)
!===========================================================================
! 3.0 Use DGB matrice
!
!!$ mrows = nx+nidbas1
!!$ ncols = nx+nidbas2
CALL get_dim(sp1, mrows)
CALL get_dim(sp2, ncols)
kl = nidbas1
ku = nidbas2
CALL init(kl, ku, ncols, 1, matm, mrows=mrows)
WRITE(*,'(/a, 2i3)') 'Band matrix:, kl, ku =', kl, ku
!
CALL CompMassMatrix(sp1, sp2, a, b, matm)
!
DO i=1,SIZE(matm%val,1)
WRITE(*,'(15f10.5)') matm%val(i,:)
END DO
!
WRITE(*,'(/a)') 'Full matrix'
ALLOCATE(matfull(mrows,ncols))
matfull = 0.0d0
DO i=1,mrows
CALL getrow(matm, i, matfull(i,:))
WRITE(*,'(15f10.5)') matfull(i,:)
END DO
WRITE(*,'(a,1pe12.4)') 'error =', MAXVAL(ABS(matfull-MassMat))
!
! Check VMX
ALLOCATE(avec(ncols,2))
ALLOCATE(bvec(mrows,2))
avec = 1.0d0
bvec = vmx(matm,avec)*REAL(nx,8)
WRITE(*,'(a)') 'M*a, with a=1'
DO j=1,2
WRITE(*,'(15f8.5)') bvec(:,j)
END DO
!===========================================================================
! 4.0 Test complex version
!
CALL init(kl, ku, ncols, 1, zmatm, mrows=mrows)
CALL CompMassMatrix(sp1, sp2, a, b, zmatm)
ALLOCATE(zavec(ncols,2))
ALLOCATE(zbvec(mrows,2))
zavec = (1.0d0,0.0d0)
zbvec = vmx(zmatm,zavec)*REAL(nx,8)
zbvec = zbvec-bvec
WRITE(*,'(/a)') 'Check complex version'
WRITE(*,'(a,2(1pe12.4))') 'Norm of errors =', &
& (dznrm2(mrows, zbvec(1,j), 1), j=1,2)
zmatm%val = zmatm%val-matm%val
WRITE(*,'(a,1pe12.4)') 'Diff of matrix elements =', MAXVAL(ABS(zmatm%val))
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(MassMat)
DEALLOCATE(xgrid)
CALL destroy_sp(sp1)
CALL destroy_sp(sp2)
call destroy(matm)
END PROGRAM main
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
diff --git a/examples/extra.c b/examples/extra.c
index 572cecd..b406698 100644
--- a/examples/extra.c
+++ b/examples/extra.c
@@ -1,49 +1,49 @@
/**
* @file extra.c
*
* @brief
*
* @copyright
* Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
* SPC (Swiss Plasma Center)
*
- * spclibs is free software: you can redistribute it and/or modify it under
+ * SPClibs is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
- * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
* @authors
* (in alphabetical order)
* @author Trach-Minh Tran
*/
/**********************************************/
#include
void quit_();
void dump_(char *filename, int *l);
void Dump(filename)
char *filename;
{
/* The user's dump routine should go here. */
int l = strlen(filename);
dump_(filename, &l);
} /* End DUMP */
/**********************************************/
void Quit()
{
/* The user's quit routine should go here. */
quit_();
} /* End QUIT */
diff --git a/examples/fit1d.f90 b/examples/fit1d.f90
index db3b786..88ae5e4 100644
--- a/examples/fit1d.f90
+++ b/examples/fit1d.f90
@@ -1,251 +1,251 @@
!>
!> @file fit1d.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Fit grid value function to a spline of any order
!
USE bsplines
USE futils
!
IMPLICIT NONE
INTEGER :: nx, nidbas
DOUBLE PRECISION :: a, b, coefx(5)
!!$ DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:), coefs(:)
DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:,:), coefs(:,:)
INTEGER :: i, dim, left
TYPE(spline1d) :: spl
DOUBLE PRECISION :: dx
INTEGER :: npts
DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fcalc(:), fexact(:), err(:)
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, POINTER :: splines(:,:) => null()
!
CHARACTER(len=128) :: file='fit1d.h5'
INTEGER :: fid
!
NAMELIST /newrun/ nx, nidbas, a, b, coefx
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 10 ! Number of intevals in x
a = 0.0d0 ! Left boundary of interval
b = 1.0d0 ! Right boundary of interval
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid and function values
!
ALLOCATE(xgrid(0:nx), fgrid(0:nx,1))
xgrid(0) = a
xgrid(nx) = b
CALL meshdist(coefx, xgrid, nx)
fgrid(:,1) = func(xgrid)
WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid
!
! Create hdf5 file
!
CALL creatf(file, fid, 'FIT1D Result File')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
!===========================================================================
! 2.0 Spline interpolation
!
! Set up the spline interpolation
!!$ CALL splcoef_setup(nidbas, xgrid, spl)
CALL set_splcoef(nidbas, xgrid, spl)
PRINT*, 'nlequid =', spl%nlequid
!
! Compute spline values and derivatives at Boundaries
ALLOCATE(fun(nidbas+1,0:nidbas))
WRITE(*,'(/a)') 'spline at the left boundary'
CALL locintv(spl, a, left)
CALL basfun(a, spl, fun, left+1)
DO i=0,nidbas
WRITE(*,'(8(1pe12.4))') fun(:,i)
END DO
!
WRITE(*,'(/a)') 'spline at the right boundary'
CALL locintv(spl, b, left)
CALL basfun(b, spl, fun, left+1)
DO i=0,nidbas
WRITE(*,'(8(1pe12.4))') fun(:,i)
END DO
DEALLOCATE(fun)
!
CALL get_dim(spl, dim)
ALLOCATE(coefs(dim,1))
WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', dim
WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of splines', LBOUND(spl%knots), &
& ':',UBOUND(spl%knots), spl%knots
!
! From given grid values fgrid, compute the spline coefs
CALL get_splcoef(spl, fgrid, coefs)
WRITE(*,'(a/(10f8.3))') 'coefs', coefs
!
! Plot all splines
npts = 100
ALLOCATE(xpt(npts))
dx = (b-a)/REAL(npts-1)
DO i=1,npts
xpt(i) = a + (i-1)*dx
END DO
CALL allsplines(spl, xpt, splines)
CALL putarr(fid, '/X', xpt)
CALL putarr(fid,'/SPLINES', splines)
!
! Check interpolation
ALLOCATE(fcalc(npts), fexact(npts), err(npts))
fexact = func(xpt)
!
! Function values
CALL gridval(spl, xpt, fcalc, 0, coefs(:,1))
err = fexact - fcalc
CALL putarr(fid, '/FEXACT', fexact, 'Exact values')
CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values')
CALL putarr(fid, '/ERROR', err, 'Interpolation errors')
WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact)
!
! Derivatives values
CALL gridval(spl, xpt, fcalc, 1)
fexact = func1(xpt)
err = fexact - fcalc
CALL putarr(fid, '/FEXACT1', fexact, 'Exact values of first derivative')
CALL putarr(fid, '/FCALC1', fcalc, 'Interpolated first derivative')
CALL putarr(fid, '/ERROR1', err, 'Interpolation errors on first derivative')
WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact)
!
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xpt, splines, fexact, fcalc, err)
DEALLOCATE(xgrid, fgrid)
CALL destroy_sp(spl)
CALL closef(fid)
!===========================================================================
!
CONTAINS
FUNCTION func(x)
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: func(SIZE(x))
!!$ INTEGER :: n
!!$ n = SIZE(x)
!!$ func = 1.d0+x*(1.d0+x*(1.d0+x))
!!$ func(1:n/2) = 1.0d0
!!$ func(n/2+1:n) = 0.5d0
func = EXP(-8.*x*x)
END FUNCTION func
!
FUNCTION func1(x)
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: func1(SIZE(x))
!!$ INTEGER :: n
!!$ n = SIZE(x)
!!$ func = 1.d0+x*(1.d0+x*(1.d0+x))
!!$ func(1:n/2) = 1.0d0
!!$ func(n/2+1:n) = 0.5d0
func1 = -16.d0*x*EXP(-8.*x*x)
END FUNCTION func1
!
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: sum2
INTEGER :: i
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
sum2 = sum2 + x(i)**2
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
END PROGRAM main
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
diff --git a/examples/fit1d_cmpl.f90 b/examples/fit1d_cmpl.f90
index ba3c7db..acae6c0 100644
--- a/examples/fit1d_cmpl.f90
+++ b/examples/fit1d_cmpl.f90
@@ -1,106 +1,106 @@
!>
!> @file fit1d_cmpl.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Fit a 1d complex function
!
USE bsplines
IMPLICIT NONE
INTEGER,PARAMETER :: NX=10, NIDBAS=3, NPTS=40
DOUBLE PRECISION :: pi, dx, xgrid(0:NX), xpt(NPTS), err
DOUBLE COMPLEX, ALLOCATABLE :: coefs(:)
DOUBLE COMPLEX :: fgrid(0:NX), fexact(NPTS), fcalc(NPTS)
INTEGER :: dim, i
TYPE(spline1d) :: spl
!================================================================================
!
! Define grid and function values on grid
!
pi = 4.0d0*ATAN(1.0d0)
xgrid(0) = 0.0d0
dx = 2.0d0*pi/NX
DO i=1,NX
xgrid(i) = xgrid(0) + i*dx
END DO
!
fgrid = func(xgrid)
!
WRITE(*,'(2a10)') 'x', 'f'
DO i=0,NX
WRITE(*,'(3f10.4)') xgrid(i), fgrid(i)
END DO
!
! Set up spline
!
CALL set_splcoef(NIDBAS, xgrid, spl, period=.TRUE.)
CALL get_dim(spl, dim)
ALLOCATE(coefs(dim))
!
! Get Spline coefficients
!
CALL get_splcoef(spl, fgrid, coefs)
WRITE(*,'(a)') 'Interpolation coefs'
DO i=1,dim
WRITE(*,'(2(1pe12.3))') coefs(i)
END DO
!
! Check interpolation
!
CALL RANDOM_NUMBER(xpt)
xpt = (2.0d0*pi) * xpt
fexact = func(xpt)
!
CALL gridval(spl, xpt, fcalc, 0, coefs)
!
WRITE(*,'(a10,2a20)') 'x', 'fexact', 'fcacl'
DO i=1,NPTS
WRITE(*,'(5f10.4)') xpt(i), fexact(i), fcalc(i)
END DO
err = norm2(fcalc-fexact)
WRITE(*,'(a,1pe12.3)') 'error', err
!
! Clean up
!
DEALLOCATE(coefs)
CALL destroy_sp(spl)
!================================================================================
CONTAINS
FUNCTION func(x)
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE COMPLEX :: func(size(x))
func = EXP( CMPLX(0.0d0, x))
END FUNCTION func
!
FUNCTION norm2(x)
!
! Compute the 2-norm of vector x
!
DOUBLE PRECISION :: norm2
DOUBLE COMPLEX, INTENT(in) :: x(:)
!
norm2 = SQRT(DOT_PRODUCT(x,x))
END FUNCTION norm2
END PROGRAM main
diff --git a/examples/fit1dbc.f90 b/examples/fit1dbc.f90
index 503a469..bb1af10 100644
--- a/examples/fit1dbc.f90
+++ b/examples/fit1dbc.f90
@@ -1,254 +1,254 @@
!>
!> @file fit1dbc.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Fit grid value function to a spline of any order
! BC using derivatives at both ends.
!
USE bsplines
USE futils
!
IMPLICIT NONE
INTEGER :: nx, nidbas
DOUBLE PRECISION :: a, b, coefx(5)
!!$ DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:), coefs(:)
DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:,:), coefs(:,:)
INTEGER :: i, dim
TYPE(spline1d) :: spl
DOUBLE PRECISION :: dx
INTEGER :: npts
DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fcalc(:), fexact(:), err(:)
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, POINTER :: splines(:,:) => null()
INTEGER :: ibc(2,10)
!!$ DOUBLE PRECISION :: fbc(2,10)
DOUBLE PRECISION :: fbc(2,10,1)
!
CHARACTER(len=128) :: file='fit1d.h5'
INTEGER :: fid
!
NAMELIST /newrun/ nx, nidbas, a, b, coefx, ibc, fbc
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 10 ! Number of intevals in x
a = 0.0d0 ! Left boundary of interval
b = 1.0d0 ! Right boundary of interval
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
ibc(1,1:10) = (/2,3,4,5,6,7,8,9,10,11/)
ibc(2,1:10) = ibc(1,1:10)
fbc = 0.0
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid and function values
!
ALLOCATE(xgrid(0:nx), fgrid(0:nx,1))
xgrid(0) = a
xgrid(nx) = b
CALL meshdist(coefx, xgrid, nx)
fgrid(:,1) = func(xgrid)
WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid
!
! Create hdf5 file
!
CALL creatf(file, fid, 'FIT1D Result File')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
!===========================================================================
! 2.0 Spline interpolation
!
! Set up the spline interpolation
CALL set_splcoef(nidbas, xgrid, spl, ibc=ibc)
!
! Compute spline values and derivatives at Boundaries
ALLOCATE(fun(nidbas+1,0:nidbas))
WRITE(*,'(/a)') 'spline at the left boundary'
CALL basfun(a, spl, fun, 1)
DO i=0,nidbas
WRITE(*,'(8(1pe12.4))') fun(:,i)
END DO
!
WRITE(*,'(/a)') 'spline at the right boundary'
CALL basfun(b, spl, fun, nx)
DO i=0,nidbas
WRITE(*,'(8(1pe12.4))') fun(:,i)
END DO
DEALLOCATE(fun)
!
CALL get_dim(spl, dim)
ALLOCATE(coefs(dim,1))
WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', dim
WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of splines', LBOUND(spl%knots), &
& ':',UBOUND(spl%knots), spl%knots
!
! From given grid values fgrid, compute the spline coefs
CALL get_splcoef(spl, fgrid, coefs, fbc)
WRITE(*,'(a/(10f8.3))') 'coefs', coefs
!
! Plot all splines
npts = 100
ALLOCATE(xpt(npts))
dx = (b-a)/REAL(npts-1)
DO i=1,npts
xpt(i) = a + (i-1)*dx
END DO
CALL allsplines(spl, xpt, splines)
CALL putarr(fid, '/X', xpt)
CALL putarr(fid,'/SPLINES', splines)
!
! Check interpolation
ALLOCATE(fcalc(npts), fexact(npts), err(npts))
fexact = func(xpt)
!
! Function values
CALL gridval(spl, xpt, fcalc, 0, coefs(:,1))
err = fexact - fcalc
CALL putarr(fid, '/FEXACT', fexact, 'Exact values')
CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values')
CALL putarr(fid, '/ERROR', err, 'Interpolation errors')
WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact)
!
! Derivatives values
CALL gridval(spl, xpt, fcalc, 1)
fexact = func1(xpt)
err = fexact - fcalc
CALL putarr(fid, '/FEXACT1', fexact, 'Exact values of first derivative')
CALL putarr(fid, '/FCALC1', fcalc, 'Interpolated first derivative')
CALL putarr(fid, '/ERROR1', err, 'Interpolation errors on first derivative')
WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact)
!
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xpt, splines, fexact, fcalc, err)
DEALLOCATE(xgrid, fgrid)
CALL destroy_sp(spl)
CALL closef(fid)
!===========================================================================
!
CONTAINS
FUNCTION func(x)
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: func(SIZE(x))
!!$ INTEGER :: n
!!$ n = SIZE(x)
!!$ func = 1.d0+x*(1.d0+x*(1.d0+x))
!!$ func(1:n/2) = 1.0d0
!!$ func(n/2+1:n) = 0.5d0
func = EXP(-8.*x*x)
END FUNCTION func
!
FUNCTION func1(x)
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: func1(SIZE(x))
!!$ INTEGER :: n
!!$ n = SIZE(x)
!!$ func = 1.d0+x*(1.d0+x*(1.d0+x))
!!$ func(1:n/2) = 1.0d0
!!$ func(n/2+1:n) = 0.5d0
func1 = -16.d0*x*EXP(-8.*x*x)
END FUNCTION func1
!
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: sum2
INTEGER :: i
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
sum2 = sum2 + x(i)**2
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
END PROGRAM main
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
diff --git a/examples/fit1dp.f90 b/examples/fit1dp.f90
index 97ca6be..29082d0 100644
--- a/examples/fit1dp.f90
+++ b/examples/fit1dp.f90
@@ -1,227 +1,227 @@
!>
!> @file fit1dp.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Fit grid value function to a spline of any order
! Periodic case.
!
USE bsplines
USE futils
!
IMPLICIT NONE
INTEGER :: nx, nidbas
DOUBLE PRECISION :: a, b, coefx(5)
DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:,:), coefs(:,:)
INTEGER :: i, dim
TYPE(spline1d) :: spl
DOUBLE PRECISION :: dx, x0, x1
INTEGER :: npts
DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fcalc(:), fexact(:), err(:)
DOUBLE PRECISION, POINTER :: splines(:,:) => null()
!
CHARACTER(len=128) :: file='fit1d.h5'
INTEGER :: fid
!
NAMELIST /newrun/ nx, nidbas, a, b, coefx
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 10 ! Number of intevals in x
a = 0.0d0 ! Left boundary of interval
b = 1.0d0 ! Right boundary of interval
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid and function values
!
ALLOCATE(xgrid(0:nx), fgrid(0:nx,1))
xgrid(0) = a
xgrid(nx) = b
CALL meshdist(coefx, xgrid, nx)
fgrid(:,1) = func(xgrid)
WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid
!
! Create hdf5 file
!
CALL creatf(file, fid, 'FIT1D Result File')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
!===========================================================================
! 2.0 Spline interpolation
!
! Set up the spline interpolation
CALL set_splcoef(nidbas, xgrid, spl, period=.TRUE.)
CALL get_dim(spl, dim)
WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', dim
WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of splines', LBOUND(spl%knots), &
& ':',UBOUND(spl%knots), spl%knots
!
ALLOCATE(coefs(dim,1))
!
! From given grid values fgrid, compute the spline coefs
CALL get_splcoef(spl, fgrid, coefs)
WRITE(*,'(a/(10f8.3))') 'coefs', coefs
!
! Plot all splines
!
npts = 100
ALLOCATE(xpt(npts))
!!$ x0 = a
!!$ x1 = b
x0 = spl%knots(0)
x1 = spl%knots(nx)
dx = (x1 -x0)/REAL(npts) ! Last point b not inluded
DO i=1,npts
xpt(i) = x0 + (i-1)*dx
END DO
CALL allsplines(spl, xpt, splines)
CALL putarr(fid, '/X', xpt)
CALL putarr(fid,'/SPLINES', splines)
!
! Check interpolation
!
ALLOCATE(fcalc(npts), fexact(npts), err(npts))
fexact = func(xpt)
!
CALL gridval(spl, xpt, fcalc, 0, coefs(:,1))
err = fexact - fcalc
CALL putarr(fid, '/FEXACT', fexact, 'Exact values')
CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values')
CALL putarr(fid, '/ERROR', err, 'Interpolation errors')
WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact)
!
! Derivatives values
CALL gridval(spl, xpt, fcalc, 1)
fexact = func1(xpt)
err = fexact - fcalc
CALL putarr(fid, '/FEXACT1', fexact, 'Exact values of first derivative')
CALL putarr(fid, '/FCALC1', fcalc, 'Interpolated first derivative')
CALL putarr(fid, '/ERROR1', err, 'Interpolation errors on first derivative')
WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact)
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xpt, splines, fexact, fcalc, err)
DEALLOCATE(xgrid, fgrid)
CALL destroy_sp(spl)
CALL closef(fid)
!===========================================================================
!
CONTAINS
FUNCTION func(x)
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: func(SIZE(x))
DOUBLE PRECISION :: pi
pi = 4.0*ATAN(1.0d0)
func = SIN(2.d0*pi*x) + 2.0d0*COS(8.d0*pi*x)
END FUNCTION func
FUNCTION func1(x)
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: func1(SIZE(x))
DOUBLE PRECISION :: pi
pi = 4.0*ATAN(1.0d0)
func1 = 2.d0*pi*COS(2.d0*pi*x) - 16.0d0*pi*SIN(8.d0*pi*x)
END FUNCTION func1
!
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: sum2
INTEGER :: i
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
sum2 = sum2 + x(i)**2
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
END PROGRAM main
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
diff --git a/examples/fit2d.f90 b/examples/fit2d.f90
index ab598de..563cd26 100644
--- a/examples/fit2d.f90
+++ b/examples/fit2d.f90
@@ -1,159 +1,159 @@
!>
!> @file fit2d.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Fit 2d grid value function to a 2d spline of any order
!
USE bsplines
USE futils
!
IMPLICIT NONE
CHARACTER(len=128) :: file='fit2d.h5'
INTEGER :: fid
INTEGER :: nx, ny, nidbas(2), mbes, dims(2)
INTEGER, PARAMETER :: nptx=100, npty=100
DOUBLE PRECISION :: pi, coefx(5), coefy(5)
DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:)
DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty)
DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs
TYPE(spline2d) :: splxy
DOUBLE PRECISION :: mem
INTEGER :: i, j
!
NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number of intevals in x
ny = 8 ! Number of intevals in y
nidbas = (/3,3/) ! Degree of splines
mbes = 2
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny))
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi
CALL meshdist(coefy, ygrid, ny)
fgrid = func(xgrid,ygrid)
!
WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny)
IF( nx.LE.10 .AND. ny.LE.10 ) THEN
WRITE(*,'(a)') 'FGRID'
DO j=0,ny
WRITE(*,'(12f8.3)') fgrid(:,j)
END DO
WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
END IF
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE2D Result File')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'MBESS', mbes)
!===========================================================================
! 2.0 Spline interpolation
!
! Setup the spline interpolation
CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./))
WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
!
! Compute spline interpolation coefficients
CALL get_dim(splxy, dims)
ALLOCATE(bcoefs(dims(1),dims(2)))
WRITE(*,'(a,2i4)') 'Dims of spline', dims
!
CALL get_splcoef(splxy, fgrid, bcoefs)
!===========================================================================
! 2.0 Check interpolation
!
dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1)
dy=(ygrid(ny)-ygrid(0))/REAL(npty-1)
DO i=1,nptx
xpt(i) = xgrid(0) + (i-1)*dx
END DO
DO i=1,npty
ypt(i) = ygrid(0) + (i-1)*dy
END DO
fexact = func(xpt,ypt)
CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs)
errs = fcalc-fexact
WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs)
!
CALL putarr(fid, '/xpt', xpt, 'r')
CALL putarr(fid, '/ypt', ypt, '\theta')
CALL putarr(fid, '/fcalc', fcalc, 'Interpolated')
CALL putarr(fid, '/fexact', fexact, 'Exact')
CALL putarr(fid, '/errs', errs, 'Errors')
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(bcoefs)
DEALLOCATE(xgrid, ygrid, fgrid)
CALL destroy_sp(splxy)
CALL closef(fid)
!
CONTAINS
FUNCTION func(x,y)
DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
DOUBLE PRECISION :: func(SIZE(x), SIZE(y))
DOUBLE PRECISION :: zy
INTEGER :: j
DO j=1,SIZE(y)
zy = -mbes * SIN(mbes*y(j))
func(:,j) =(1-x(:)**2) * x(:)**mbes * zy
END DO
END FUNCTION func
!
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: sum2
INTEGER :: i
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
sum2 = sum2 + x(i)**2
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
END PROGRAM main
diff --git a/examples/fit2d1d.f90 b/examples/fit2d1d.f90
index e0ffec3..23a919f 100644
--- a/examples/fit2d1d.f90
+++ b/examples/fit2d1d.f90
@@ -1,154 +1,154 @@
!>
!> @file fit2d1d.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Fit 2d grid value function to a 2d spline of any order
! Interpolating on an grid (x_i,y_j) or a set of particle
! positions (x_p,y_p).
!
USE bsplines
!
IMPLICIT NONE
INTEGER :: nx, ny, nidbas(2), mbes, dims(2)
INTEGER, PARAMETER :: nptx=100, npty=100
DOUBLE PRECISION :: pi, coefx(5), coefy(5)
DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:)
DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty)
DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs
DOUBLE PRECISION, DIMENSION(nptx*npty) :: xp, yp, fcalcp, fexactp, errsp
TYPE(spline2d) :: splxy
DOUBLE PRECISION :: mem
INTEGER :: i, j
!
NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number of intevals in x
ny = 8 ! Number of intevals in y
nidbas = (/3,3/) ! Degree of splines
mbes = 2
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny))
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi
CALL meshdist(coefy, ygrid, ny)
fgrid = func(xgrid,ygrid)
!===========================================================================
! 2.0 Spline interpolation
!
! Setup the spline interpolation
CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./))
!
! Compute spline interpolation coefficients
CALL get_dim(splxy, dims)
ALLOCATE(bcoefs(dims(1),dims(2)))
WRITE(*,'(a,2i4)') 'Dims of spline', dims
!
CALL get_splcoef(splxy, fgrid, bcoefs)
!===========================================================================
! 2.0 Check interpolation
!
dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1)
dy=(ygrid(ny)-ygrid(0))/REAL(npty-1)
DO i=1,nptx
xpt(i) = xgrid(0) + (i-1)*dx
END DO
DO i=1,npty
ypt(i) = ygrid(0) + (i-1)*dy
END DO
fexact = func(xpt,ypt)
CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs)
errs = fcalc-fexact
WRITE(*,*) 'Using the GRIDVAL2D2D'
WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs)
!
! The 2d1d version
WRITE(*,*) 'Using the GRIDVAL2D1D'
CALL RANDOM_NUMBER(xp)
CALL RANDOM_NUMBER(yp)
yp=2.0*pi*yp
fexactp = func1(xp,yp)
!!$ CALL gridval(splxy, xp, yp, fcalcp, (/0,0/), bcoefs)
CALL gridval(splxy, xp, yp, fcalcp, (/0,0/))
errsp = fcalcp-fexactp
WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errsp), MAXVAL(errsp)
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(bcoefs)
DEALLOCATE(xgrid, ygrid, fgrid)
CALL destroy_sp(splxy)
!
CONTAINS
FUNCTION func(x,y)
DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
DOUBLE PRECISION :: func(SIZE(x), SIZE(y))
DOUBLE PRECISION :: zy
INTEGER :: j
DO j=1,SIZE(y)
zy = -mbes * SIN(mbes*y(j))
func(:,j) =(1-x(:)**2) * x(:)**mbes * zy
END DO
END FUNCTION func
FUNCTION func1(x,y)
DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
DOUBLE PRECISION :: func1(SIZE(x))
DOUBLE PRECISION :: zy
INTEGER :: j
DO j=1,SIZE(x)
zy = -mbes * SIN(mbes*y(j))
func1(j) =(1-x(j)**2) * x(j)**mbes * zy
END DO
END FUNCTION func1
!
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: sum2
INTEGER :: i
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
sum2 = sum2 + x(i)**2
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
END PROGRAM main
diff --git a/examples/fit2d_cmpl.f90 b/examples/fit2d_cmpl.f90
index 6e6a4ee..4436ed0 100644
--- a/examples/fit2d_cmpl.f90
+++ b/examples/fit2d_cmpl.f90
@@ -1,132 +1,132 @@
!>
!> @file fit2d_cmpl.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Fit 2d grid value function to a 2d spline of any order
!
USE bsplines
!
IMPLICIT NONE
INTEGER :: nx, ny, nidbas(2), mbes, dims(2)
INTEGER, PARAMETER :: npt=10000
DOUBLE PRECISION :: pi, coefx(5), coefy(5)
DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:)
DOUBLE COMPLEX, ALLOCATABLE :: fgrid(:,:), fgrid_calc(:,:), bcoefs(:,:)
DOUBLE PRECISION :: dx, dy, xpt(npt), ypt(npt), errs(npt)
DOUBLE COMPLEX, DIMENSION(npt) :: fcalc, fexact
TYPE(spline2d) :: splxy
INTEGER :: i, j
!
NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number of intevals in x
ny = 8 ! Number of intevals in y
nidbas = (/3,3/) ! Degree of splines
mbes = 2
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny))
ALLOCATE(fgrid(0:nx,0:ny), fgrid_calc(0:nx,0:ny))
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi
CALL meshdist(coefy, ygrid, ny)
fgrid = func2(xgrid,ygrid)
!===========================================================================
! 2.0 Spline interpolation
!
! Setup the spline interpolation
CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./))
!
! Compute spline interpolation coefficients
CALL get_dim(splxy, dims)
ALLOCATE(bcoefs(dims(1),dims(2)))
WRITE(*,'(a,2i4)') 'Dims of spline', dims
!
CALL get_splcoef(splxy, fgrid, bcoefs)
!===========================================================================
! 2.0 Check interpolation
!
CALL RANDOM_NUMBER(xpt)
CALL RANDOM_NUMBER(ypt)
ypt(:) = ypt(:)*2.0*pi
fexact = func1(xpt,ypt)
!
CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs)
errs = ABS(fcalc-fexact)
WRITE(*,'(a,2(1pe12.3))') 'Max errors (on random points)', MAXVAL(errs)
!
CALL gridval(splxy, xgrid, ygrid, fgrid_calc, (/0,0/))
WRITE(*,'(a,2(1pe12.3))') 'Max errors (on grid points)', &
& MAXVAL(ABS(fgrid_calc-fgrid))
!
fgrid_calc = 0.0
DO j=0,ny
ypt(1:nx+1) = ygrid(j)
CALL gridval(splxy, xgrid, ypt(1:nx+1), fgrid_calc(:,j), (/0,0/))
END DO
WRITE(*,'(a,2(1pe12.3))') 'Max errors (on grid points)', &
& MAXVAL(ABS(fgrid_calc-fgrid))
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(bcoefs)
DEALLOCATE(xgrid, ygrid, fgrid, fgrid_calc)
CALL destroy_sp(splxy)
!
CONTAINS
FUNCTION func2(x,y)
DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
DOUBLE COMPLEX :: func2(SIZE(x), SIZE(y))
DOUBLE COMPLEX :: zy
INTEGER :: j
DO j=1,SIZE(y)
zy = -mbes * CMPLX(SIN(mbes*y(j)), COS(mbes*y(j)))
func2(:,j) =(1-x(:)**2) * x(:)**mbes * zy
END DO
END FUNCTION func2
FUNCTION func1(x,y)
DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
DOUBLE COMPLEX :: func1(SIZE(x))
DOUBLE COMPLEX :: zy
INTEGER :: j
DO j=1,SIZE(x)
zy = -mbes * CMPLX(SIN(mbes*y(j)), COS(mbes*y(j)))
func1(j) =(1-x(j)**2) * x(j)**mbes * zy
END DO
END FUNCTION func1
END PROGRAM main
diff --git a/examples/fit2dbc.f90 b/examples/fit2dbc.f90
index 75862c0..cae0d98 100644
--- a/examples/fit2dbc.f90
+++ b/examples/fit2dbc.f90
@@ -1,183 +1,183 @@
!>
!> @file fit2dbc.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Fit 2d grid value function to a 2d spline of any order
! BC using derivatives at both ends, in the non-periodic direction.
!
USE bsplines
USE futils
!
IMPLICIT NONE
CHARACTER(len=128) :: file='fit2d.h5'
INTEGER :: fid
INTEGER :: nx, ny, nidbas(2), mbes, dims(2)
INTEGER, PARAMETER :: nptx=100, npty=100
DOUBLE PRECISION :: pi, coefx(5), coefy(5)
DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:)
DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty)
DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs
TYPE(spline2d) :: splxy
DOUBLE PRECISION :: mem
INTEGER :: i, j, ii
INTEGER :: ibc1(2,10), ibc2(2,10)
DOUBLE PRECISION, ALLOCATABLE:: fbc(:,:,:)
!
NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number of intevals in x
ny = 8 ! Number of intevals in y
nidbas = (/3,3/) ! Degree of splines
mbes = 2
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny))
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
ygrid(0) = 0.0d0; ygrid(ny) = 1.0d0
CALL meshdist(coefy, ygrid, ny)
fgrid = func(xgrid,ygrid)
!
WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny)
IF( nx.LE.10 .AND. ny.LE.10 ) THEN
WRITE(*,'(a)') 'FGRID'
DO j=0,ny
WRITE(*,'(12f8.3)') fgrid(:,j)
END DO
WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
END IF
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE2D Result File')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'MBESS', mbes)
!===========================================================================
! 2.0 Spline interpolation
!
! Setup the spline interpolation
ii=1 ! Start with first derivative
DO i = 1, nidbas(1)/2
ibc1(1,i) = ii+i-1
ibc1(2,i) = ii+i-1
END DO
ibc2 = ibc1
CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .FALSE./),&
& ibc1=ibc1, ibc2=ibc2)
WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
!
! Compute spline interpolation coefficients
CALL get_dim(splxy, dims)
ALLOCATE(bcoefs(dims(1),dims(2)))
WRITE(*,'(a,2i4)') 'Dims of spline', dims
!
ALLOCATE(fbc(2, nidbas(1)/2, 0:ny))
fbc=0.0d0
!
WRITE(*,'(a/(10f8.3))') 'fbc(1)', fbc(1,1,:)
WRITE(*,'(a/(10f8.3))') 'fbc(2)', fbc(2,1,:)
!
CALL get_splcoef(splxy, fgrid, bcoefs, fbc1=fbc, fbc2=fbc)
!
DEALLOCATE(fbc)
!===========================================================================
! 2.0 Check interpolation
!
dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1)
dy=(ygrid(ny)-ygrid(0))/REAL(npty-1)
DO i=1,nptx
xpt(i) = xgrid(0) + (i-1)*dx
END DO
DO i=1,npty
ypt(i) = ygrid(0) + (i-1)*dy
END DO
fexact = func(xpt,ypt)
CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs)
errs = fcalc-fexact
WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs)
!
CALL putarr(fid, '/xpt', xpt, 'r')
CALL putarr(fid, '/ypt', ypt, '\theta')
CALL putarr(fid, '/bcoefs', bcoefs, 'bcoefs')
CALL putarr(fid, '/fcalc', fcalc, 'Interpolated')
CALL putarr(fid, '/fexact', fexact, 'Exact')
CALL putarr(fid, '/errs', errs, 'Errors')
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(bcoefs)
DEALLOCATE(xgrid, ygrid, fgrid)
CALL destroy_sp(splxy)
CALL closef(fid)
!
!===========================================================================
CONTAINS
FUNCTION func(x,y)
!
! A function with zeo derivatives at both ends
!
DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
DOUBLE PRECISION :: func(SIZE(x), SIZE(y))
DOUBLE PRECISION :: zy
INTEGER :: j
DO j=1,SIZE(y)
zy = y(j)*y(j)*(y(j)-1.5d0)
func(:,j) = x(:)*x(:)*(x(:)-1.5d0) + zy
END DO
END FUNCTION func
!
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: sum2
INTEGER :: i
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
sum2 = sum2 + x(i)**2
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
END PROGRAM main
diff --git a/examples/fit2dbc_x.f90 b/examples/fit2dbc_x.f90
index 8795668..bf64d80 100644
--- a/examples/fit2dbc_x.f90
+++ b/examples/fit2dbc_x.f90
@@ -1,202 +1,202 @@
!>
!> @file fit2dbc_x.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Fit 2d grid value function to a 2d spline of any order
! BC using derivatives at both ends, in the non-periodic direction.
!
! Testing BC on derivative along first direction
!
USE bsplines
USE futils
!
IMPLICIT NONE
CHARACTER(len=128) :: file='fit2d.h5'
INTEGER :: fid
INTEGER :: nx, ny, nidbas(2), mbes, dims(2)
INTEGER, PARAMETER :: nptx=100, npty=100
DOUBLE PRECISION :: pi, coefx(5), coefy(5)
DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:)
DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty)
DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs
TYPE(spline2d) :: splxy
DOUBLE PRECISION :: mem
INTEGER :: i, j, ii
INTEGER :: ibc(2,10)
DOUBLE PRECISION, ALLOCATABLE:: fbc(:,:,:)
!
NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number of intevals in x
ny = 8 ! Number of intevals in y
nidbas = (/3,3/) ! Degree of splines
mbes = 2
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny))
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi
CALL meshdist(coefy, ygrid, ny)
fgrid = func(xgrid,ygrid)
!
WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny)
IF( nx.LE.10 .AND. ny.LE.10 ) THEN
WRITE(*,'(a)') 'FGRID(x, y)'
DO j=0,ny
WRITE(*,'(12f8.3)') fgrid(:,j)
END DO
WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
END IF
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE2D Result File')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'MBESS', mbes)
!===========================================================================
! 2.0 Spline interpolation
!
! Setup the spline interpolation
ii=1 ! Start with first derivative
DO i = 1, nidbas(1)/2
ibc(1,i) = ii+i-1
ibc(2,i) = ii+i-1
END DO
CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./),&
& ibc1=ibc)
WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
!
! Compute spline interpolation coefficients
CALL get_dim(splxy, dims)
ALLOCATE(bcoefs(dims(1),dims(2)))
WRITE(*,'(a,2i4)') 'Dims of spline', dims
!
ALLOCATE(fbc(2, nidbas(1)/2, 0:ny))
fbc=0.0d0
!
!!$! Exact first derivatives at boundaries
!!$ fbc(1,1:1,:) = func1(xgrid(0:0), ygrid(0:ny))
!!$ fbc(2,1:1,:) = func1(xgrid(nx:nx), ygrid(0:ny))
!
!!$! Derivatives at boundaries approximated with FD
!!$ DO j=0,ny
!!$ fbc(1,1,j+1) = fgrid(1,j)-fgrid(0,j)
!!$ fbc(2,1,j+1) = fgrid(nx,j)-fgrid(nx-1,j)
!!$ END DO
!!$ fbc(1,1,:) = fbc(1,1,:)/(xgrid(1)-xgrid(0))
!!$ fbc(2,1,:) = fbc(2,1,:)/(xgrid(nx)-xgrid(nx-1))
!
WRITE(*,'(a/(10f8.3))') 'fbc(1)', fbc(1,1,:)
WRITE(*,'(a/(10f8.3))') 'fbc(2)', fbc(2,1,:)
!
CALL get_splcoef(splxy, fgrid, bcoefs, fbc1=fbc)
!
DEALLOCATE(fbc)
!===========================================================================
! 2.0 Check interpolation
!
dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1)
dy=(ygrid(ny)-ygrid(0))/REAL(npty-1)
DO i=1,nptx
xpt(i) = xgrid(0) + (i-1)*dx
END DO
DO i=1,npty
ypt(i) = ygrid(0) + (i-1)*dy
END DO
fexact = func(xpt,ypt)
CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs)
errs = fcalc-fexact
WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs)
!
CALL putarr(fid, '/xpt', xpt, 'r')
CALL putarr(fid, '/ypt', ypt, '\theta')
CALL putarr(fid, '/fcalc', fcalc, 'Interpolated')
CALL putarr(fid, '/fexact', fexact, 'Exact')
CALL putarr(fid, '/errs', errs, 'Errors')
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(bcoefs)
DEALLOCATE(xgrid, ygrid, fgrid)
CALL destroy_sp(splxy)
CALL closef(fid)
!
!===========================================================================
CONTAINS
FUNCTION func(x,y)
DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
DOUBLE PRECISION :: func(SIZE(x), SIZE(y))
DOUBLE PRECISION :: zy
INTEGER :: j
DO j=1,SIZE(y)
zy = -mbes * SIN(mbes*y(j))
func(:,j) =(1-x(:)**2) * x(:)**mbes * zy
END DO
END FUNCTION func
FUNCTION func1(x,y)
DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
DOUBLE PRECISION :: func1(SIZE(x), SIZE(y))
DOUBLE PRECISION :: zy
INTEGER :: j
DO j=1,SIZE(y)
zy = -mbes * SIN(mbes*y(j))
func1(:,j) = (mbes - (mbes+2.0d0)*x(:)**2) * x(:)**(mbes-1) * zy
END DO
END FUNCTION func1
!
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: sum2
INTEGER :: i
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
sum2 = sum2 + x(i)**2
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
END PROGRAM main
diff --git a/examples/fit2dbc_y.f90 b/examples/fit2dbc_y.f90
index ff73d65..08b4dcb 100644
--- a/examples/fit2dbc_y.f90
+++ b/examples/fit2dbc_y.f90
@@ -1,202 +1,202 @@
!>
!> @file fit2dbc_y.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Fit 2d grid value function to a 2d spline of any order
! BC using derivatives at both ends, in the non-periodic direction.
!
! Testing BC on derivative along second direction
!
USE bsplines
USE futils
!
IMPLICIT NONE
CHARACTER(len=128) :: file='fit2d.h5'
INTEGER :: fid
INTEGER :: nx, ny, nidbas(2), mbes, dims(2)
INTEGER, PARAMETER :: nptx=100, npty=100
DOUBLE PRECISION :: pi, coefx(5), coefy(5)
DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:)
DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty)
DOUBLE PRECISION, DIMENSION(npty,nptx) :: fcalc, fexact, errs
TYPE(spline2d) :: splxy
DOUBLE PRECISION :: mem
INTEGER :: i, j, ii
INTEGER :: ibc(2,10)
DOUBLE PRECISION, ALLOCATABLE:: fbc(:,:,:)
!
NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number of intevals in x
ny = 8 ! Number of intevals in y
nidbas = (/3,3/) ! Degree of splines
mbes = 2
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:ny,0:nx))
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi
CALL meshdist(coefy, ygrid, ny)
fgrid = TRANSPOSE(func(xgrid,ygrid))
!
WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny)
IF( nx.LE.10 .AND. ny.LE.10 ) THEN
WRITE(*,'(a)') 'FGRID(y, x)'
DO j=0,ny
WRITE(*,'(12f8.3)') fgrid(j,:)
END DO
WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
END IF
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE2D Result File')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'MBESS', mbes)
!===========================================================================
! 2.0 Spline interpolation
!
! Setup the spline interpolation
ii=1 ! Start with first derivative
DO i = 1, nidbas(1)/2
ibc(1,i) = ii+i-1
ibc(2,i) = ii+i-1
END DO
CALL set_splcoef((/nidbas(2), nidbas(1)/), ygrid, xgrid, splxy, (/.TRUE., .FALSE./),&
& ibc2=ibc)
WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
!
! Compute spline interpolation coefficients
CALL get_dim(splxy, dims)
ALLOCATE(bcoefs(dims(1),dims(2)))
WRITE(*,'(a,2i4)') 'Dims of spline', dims
!
ALLOCATE(fbc(2, nidbas(1)/2, 0:ny))
fbc=0.0d0
!
!!$! Exact first derivatives at boundaries
!!$ fbc(1,1:1,:) = func1(xgrid(0:0), ygrid(0:ny))
!!$ fbc(2,1:1,:) = func1(xgrid(nx:nx), ygrid(0:ny))
!
!!$! Derivatives at boundaries approximated with FD
!!$ DO j=0,ny
!!$ fbc(1,1,j+1) = fgrid(1,j)-fgrid(0,j)
!!$ fbc(2,1,j+1) = fgrid(nx,j)-fgrid(nx-1,j)
!!$ END DO
!!$ fbc(1,1,:) = fbc(1,1,:)/(xgrid(1)-xgrid(0))
!!$ fbc(2,1,:) = fbc(2,1,:)/(xgrid(nx)-xgrid(nx-1))
!
WRITE(*,'(a/(10f8.3))') 'fbc(1)', fbc(1,1,:)
WRITE(*,'(a/(10f8.3))') 'fbc(2)', fbc(2,1,:)
!
CALL get_splcoef(splxy, fgrid, bcoefs, fbc2=fbc)
!
DEALLOCATE(fbc)
!===========================================================================
! 2.0 Check interpolation
!
dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1)
dy=(ygrid(ny)-ygrid(0))/REAL(npty-1)
DO i=1,nptx
xpt(i) = xgrid(0) + (i-1)*dx
END DO
DO i=1,npty
ypt(i) = ygrid(0) + (i-1)*dy
END DO
fexact = TRANSPOSE(func(xpt,ypt))
CALL gridval(splxy, ypt, xpt, fcalc, (/0,0/), bcoefs)
errs = fcalc-fexact
WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs)
!
CALL putarr(fid, '/xpt', xpt, 'r')
CALL putarr(fid, '/ypt', ypt, '\theta')
CALL putarr(fid, '/fcalc', fcalc, 'Interpolated')
CALL putarr(fid, '/fexact', fexact, 'Exact')
CALL putarr(fid, '/errs', errs, 'Errors')
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(bcoefs)
DEALLOCATE(xgrid, ygrid, fgrid)
CALL destroy_sp(splxy)
CALL closef(fid)
!
!===========================================================================
CONTAINS
FUNCTION func(x,y)
DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
DOUBLE PRECISION :: func(SIZE(x), SIZE(y))
DOUBLE PRECISION :: zy
INTEGER :: j
DO j=1,SIZE(y)
zy = -mbes * SIN(mbes*y(j))
func(:,j) =(1-x(:)**2) * x(:)**mbes * zy
END DO
END FUNCTION func
FUNCTION func1(x,y)
DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
DOUBLE PRECISION :: func1(SIZE(x), SIZE(y))
DOUBLE PRECISION :: zy
INTEGER :: j
DO j=1,SIZE(y)
zy = -mbes * SIN(mbes*y(j))
func1(:,j) = (mbes - (mbes+2.0d0)*x(:)**2) * x(:)**(mbes-1) * zy
END DO
END FUNCTION func1
!
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: sum2
INTEGER :: i
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
sum2 = sum2 + x(i)**2
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
END PROGRAM main
diff --git a/examples/getgrad_perf.f90 b/examples/getgrad_perf.f90
index 0a168c9..79a22f0 100644
--- a/examples/getgrad_perf.f90
+++ b/examples/getgrad_perf.f90
@@ -1,221 +1,221 @@
!>
!> @file getgrad_perf.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test and compare performance of using "spline" and
! "pp" forms. 2D case
!
USE bsplines
!
IMPLICIT NONE
INTEGER :: nx, ny, ngauss(2), nidbas(2), nits
INTEGER :: npt, d1, d2
INTEGER :: i, j, its, ngroup=4
INTEGER :: i1, i2, nset, nremain
DOUBLE PRECISION :: pi, dx, dy
DOUBLE PRECISION :: seconds, t0, t1, tscal, tvec
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: coefs
DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), fgrid00(:), fgrid01(:), fgrid10(:)
DOUBLE PRECISION, ALLOCATABLE :: fgrad00(:), fgrad01(:), fgrad10(:)
TYPE(spline2d) :: splxy
!
NAMELIST /newrun/ nx, ny, nidbas, npt, nits
!===============================================================================
! 0.0 Prologue
!
! 2D grid
!
nx = 8
ny = 8
nidbas = (/ 3, 3 /)
npt = 100000
nits =100
READ(*,newrun)
WRITE(*,newrun)
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny))
dx = 1.0d0/REAL(nx)
xgrid = (/ (i*dx,i=0,nx) /)
dy = 2.0d0*pi/REAL(ny)
ygrid = (/ (j*dy,j=0,ny) /)
!
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
!
! Set up spline
!
ngauss = 4
CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, (/.FALSE., .TRUE./))
d1 = splxy%sp1%dim
d2 = splxy%sp2%dim
WRITE(*,'(a,3i4)') 'd1, d2 =', d1, d2
WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
!
ALLOCATE(xpt(npt), ypt(npt))
CALL RANDOM_NUMBER(xpt)
CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt
!
ALLOCATE(coefs(d1,d2))
ALLOCATE(fgrad00(npt), fgrad01(npt), fgrad10(npt))
ALLOCATE(fgrid00(npt), fgrid01(npt), fgrid10(npt))
!
!===============================================================================
! 1.0 PPFORM
!
coefs = 1.0d0 ! => f=1, all derivatives = 0!
!
splxy%sp1%nlppform = .TRUE.
splxy%sp2%nlppform = .TRUE.
CALL gridval(splxy, xpt, ypt, fgrid00, (/0,0/), coefs)
!
! Vector GRIDVAL
WRITE(*,'(/a/a5,2a12)') 'Vector ppform', 'N', 't(s)', 'SpeedUp'
ngroup = 1
DO WHILE (ngroup.LT.npt/2)
nset = npt/ngroup
nremain = MODULO(npt, ngroup)
IF(nremain.NE.0) nset = nset+1
t0 = seconds()
i2 = 0
DO i=1,nset
i1 = i2+1
i2 = MIN(i2+ngroup,npt)
DO its=1,nits
CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid00(i1:i2), (/0,0/))
CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid01(i1:i2), (/0,1/))
CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid10(i1:i2), (/1,0/))
END DO
END DO
t1 = seconds()-t0
tvec = t1/REAL(npt*nits)
IF(ngroup.EQ.1) tscal=tvec
WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec
ngroup = 2*ngroup
END DO
WRITE(*,'(/a,3(1pe12.3))') 'GRIDVAL PPFORM: Max errors', &
& MAXVAL(ABS(fgrid00-1.0d0)), MAXVAL(ABS(fgrid01)), MAXVAL(ABS(fgrid10))
!
! Vector GETGRAD
WRITE(*,'(/a/a5,2a12)') 'Vector ppform', 'N', 't(s)', 'SpeedUp'
ngroup = 1
DO WHILE (ngroup.LT.npt/2)
nset = npt/ngroup
nremain = MODULO(npt, ngroup)
IF(nremain.NE.0) nset = nset+1
t0 = seconds()
i2 = 0
DO i=1,nset
i1 = i2+1
i2 = MIN(i2+ngroup,npt)
DO its=1,nits
CALL getgrad(splxy, xpt(i1:i2), ypt(i1:i2), &
& fgrad00(i1:i2), fgrad10(i1:i2), fgrad01(i1:i2))
END DO
END DO
t1 = seconds()-t0
tvec = t1/REAL(npt*nits)
IF(ngroup.EQ.1) tscal=tvec
WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec
ngroup = 2*ngroup
END DO
WRITE(*,'(/a,3(1pe12.3))') 'GETGRAD PPFORM: Max errors', &
& MAXVAL(ABS(fgrad00-fgrid00)), MAXVAL(ABS(fgrad01-fgrid01)), &
& MAXVAL(ABS(fgrad10-fgrid10))
!===============================================================================
! 2.0 Spline expansion
!
coefs = 1.0d0 ! => f=1, all derivatives = 0!
!
splxy%sp1%nlppform = .FALSE.
splxy%sp2%nlppform = .FALSE.
CALL gridval(splxy, xpt, ypt, fgrid00, (/0,0/), coefs)
!
! Vector GRIDVAL
WRITE(*,'(/a/a5,2a12)') 'Vector spline', 'N', 't(s)', 'SpeedUp'
ngroup = 1
DO WHILE (ngroup.LT.npt/2)
nset = npt/ngroup
nremain = MODULO(npt, ngroup)
IF(nremain.NE.0) nset = nset+1
t0 = seconds()
i2 = 0
DO i=1,nset
i1 = i2+1
i2 = MIN(i2+ngroup,npt)
DO its=1,nits
CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid00(i1:i2), (/0,0/))
CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid01(i1:i2), (/0,1/))
CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid10(i1:i2), (/1,0/))
END DO
END DO
t1 = seconds()-t0
tvec = t1/REAL(npt*nits)
IF(ngroup.EQ.1) tscal=tvec
WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec
ngroup = 2*ngroup
END DO
WRITE(*,'(/a,3(1pe12.3))') 'GRIDVAL SPLINE: Max errors', &
& MAXVAL(ABS(fgrid00-1.0d0)), MAXVAL(ABS(fgrid01)), MAXVAL(ABS(fgrid10))
!
! Vector GETGRAD
WRITE(*,'(/a/a5,2a12)') 'Vector spline', 'N', 't(s)', 'SpeedUp'
ngroup = 1
DO WHILE (ngroup.LT.npt/2)
nset = npt/ngroup
nremain = MODULO(npt, ngroup)
IF(nremain.NE.0) nset = nset+1
t0 = seconds()
i2 = 0
DO i=1,nset
i1 = i2+1
i2 = MIN(i2+ngroup,npt)
DO its=1,nits
CALL getgrad(splxy, xpt(i1:i2), ypt(i1:i2), &
& fgrad00(i1:i2), fgrad10(i1:i2), fgrad01(i1:i2))
END DO
END DO
t1 = seconds()-t0
tvec = t1/REAL(npt*nits)
IF(ngroup.EQ.1) tscal=tvec
WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec
ngroup = 2*ngroup
END DO
WRITE(*,'(/a,3(1pe12.3))') 'GETGRAD SPLINE: Max errors', &
& MAXVAL(ABS(fgrad00-fgrid00)), MAXVAL(ABS(fgrad01-fgrid01)), &
& MAXVAL(ABS(fgrad10-fgrid10))
!===============================================================================
!
! Clean up
!
CALL destroy_sp(splxy)
DEALLOCATE(xgrid, ygrid, coefs)
DEALLOCATE(xpt, ypt, fgrid00, fgrid01, fgrid10)
DEALLOCATE(fgrad00, fgrad01, fgrad10)
END PROGRAM main
diff --git a/examples/gridval_perf.f90 b/examples/gridval_perf.f90
index d098531..f8d7e5d 100644
--- a/examples/gridval_perf.f90
+++ b/examples/gridval_perf.f90
@@ -1,194 +1,194 @@
!>
!> @file gridval_perf.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test and compare performance of using "spline" and
! "pp" forms. 2D case
!
USE bsplines
!
IMPLICIT NONE
INTEGER :: nx, ny, ngauss(2), nidbas(2), nits
INTEGER :: npt, d1, d2
INTEGER :: i, j, its, ngroup=4
INTEGER :: i1, i2, nset, nremain
DOUBLE PRECISION :: pi, dx, dy
DOUBLE PRECISION :: seconds, t0, t1, tscal, tvec
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: coefs
DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), fpt00(:), fpt01(:), fpt10(:)
DOUBLE PRECISION, ALLOCATABLE :: fscal00(:), fscal01(:), fscal10(:)
TYPE(spline2d) :: splxy
!
NAMELIST /newrun/ nx, ny, nidbas, npt, nits
!===============================================================================
! 0.0 Prologue
!
! 2D grid
!
nx = 8
ny = 8
nidbas = (/ 3, 3 /)
npt = 100000
nits =100
READ(*,newrun)
WRITE(*,newrun)
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny))
dx = 1.0d0/REAL(nx)
xgrid = (/ (i*dx,i=0,nx) /)
dy = 2.0d0*pi/REAL(ny)
ygrid = (/ (j*dy,j=0,ny) /)
!
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
!
! Set up spline
!
ngauss = 4
CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, (/.FALSE., .TRUE./))
d1 = splxy%sp1%dim
d2 = splxy%sp2%dim
WRITE(*,'(a,3i4)') 'd1, d2 =', d1, d2
WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
!===============================================================================
! 1.0 PPFORM
!
ALLOCATE(xpt(npt), ypt(npt))
CALL RANDOM_NUMBER(xpt)
CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt
!
ALLOCATE(coefs(d1,d2))
ALLOCATE(fscal00(npt), fscal01(npt), fscal10(npt))
ALLOCATE(fpt00(npt), fpt01(npt), fpt10(npt))
!
coefs = 1.0d0 ! => f=1, all derivatives = 0!
!
splxy%sp1%nlppform = .TRUE.
splxy%sp2%nlppform = .TRUE.
CALL gridval(splxy, xpt, ypt, fscal00, (/0,0/), coefs)
!
! Scalar PPFORM
t0 = seconds()
DO i=1,npt
DO its=1,nits
CALL gridval(splxy, xpt(i), ypt(i), fscal00(i), (/0,0/))
CALL gridval(splxy, xpt(i), ypt(i), fscal01(i), (/0,1/))
CALL gridval(splxy, xpt(i), ypt(i), fscal10(i), (/1,0/))
END DO
END DO
t1 = seconds()-t0
tscal = t1/REAL(npt*nits,8)
WRITE(*,'(/a,3(1pe12.3))') 'Scalar PPFORM: Max errors', &
& MAXVAL(ABS(fscal00-1.0d0)), MAXVAL(ABS(fscal01)), MAXVAL(ABS(fscal10))
WRITE(*,'(a,3(1pe12.3))') 'time(s)', tscal
!
! Vector PPFORM
WRITE(*,'(/a/a5,2a12)') 'Vector ppform', 'N', 't(s)', 'SpeedUp'
ngroup = 1
DO WHILE (ngroup.LT.npt/2)
nset = npt/ngroup
nremain = MODULO(npt, ngroup)
IF(nremain.NE.0) nset = nset+1
t0 = seconds()
i2 = 0
DO i=1,nset
i1 = i2+1
i2 = MIN(i2+ngroup,npt)
DO its=1,nits
CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt00(i1:i2), (/0,0/))
CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt01(i1:i2), (/0,1/))
CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt10(i1:i2), (/1,0/))
END DO
END DO
t1 = seconds()-t0
tvec = t1/REAL(npt*nits)
WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec
ngroup = 2*ngroup
END DO
WRITE(*,'(/a,3(1pe12.3))') 'Vector PPFORM: Max errors', &
& MAXVAL(ABS(fpt00-fscal00)), MAXVAL(ABS(fpt01-fscal01)), MAXVAL(ABS(fpt10-fscal10))
!===============================================================================
! 2.0 Sline expansion
!
coefs = 1.0d0 ! => f=1, all derivatives = 0!
!
splxy%sp1%nlppform = .FALSE.
splxy%sp2%nlppform = .FALSE.
CALL gridval(splxy, xpt, ypt, fscal00, (/0,0/), coefs)
!
! Scalar SPLINE
t0 = seconds()
DO i=1,npt
DO its=1,nits
CALL gridval(splxy, xpt(i), ypt(i), fscal00(i), (/0,0/))
CALL gridval(splxy, xpt(i), ypt(i), fscal01(i), (/0,1/))
CALL gridval(splxy, xpt(i), ypt(i), fscal10(i), (/1,0/))
END DO
END DO
t1 = seconds()-t0
tscal = t1/REAL(npt*nits,8)
WRITE(*,'(/a,3(1pe12.3))') 'Scalar SPLINE: Max errors', &
& MAXVAL(ABS(fscal00-1.0d0)), MAXVAL(ABS(fscal01)), MAXVAL(ABS(fscal10))
WRITE(*,'(a,3(1pe12.3))') 'time(s)', tscal
!
! Vector SPLINE
WRITE(*,'(/a/a5,2a12)') 'Vector spline', 'N', 't(s)', 'SpeedUp'
ngroup = 1
DO WHILE (ngroup.LT.npt/2)
nset = npt/ngroup
nremain = MODULO(npt, ngroup)
IF(nremain.NE.0) nset = nset+1
t0 = seconds()
i2 = 0
DO i=1,nset
i1 = i2+1
i2 = MIN(i2+ngroup,npt)
DO its=1,nits
CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt00(i1:i2), (/0,0/))
CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt01(i1:i2), (/0,1/))
CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt10(i1:i2), (/1,0/))
END DO
END DO
t1 = seconds()-t0
tvec = t1/REAL(npt*nits)
WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec
ngroup = 2*ngroup
END DO
WRITE(*,'(/a,3(1pe12.3))') 'Vector SPLINE: Max errors', &
& MAXVAL(ABS(fpt00-fscal00)), MAXVAL(ABS(fpt01-fscal01)), MAXVAL(ABS(fpt10-fscal10))
!===============================================================================
!
! Clean up
!
CALL destroy_sp(splxy)
DEALLOCATE(xgrid, ygrid, coefs)
DEALLOCATE(xpt, ypt, fpt00, fpt01, fpt10)
DEALLOCATE(fscal00, fscal01, fscal10)
END PROGRAM main
diff --git a/examples/gyro.f90 b/examples/gyro.f90
index d5a79e8..3c7cb9f 100644
--- a/examples/gyro.f90
+++ b/examples/gyro.f90
@@ -1,179 +1,179 @@
!>
!> @file gyro.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Gyro-average using splines.
! F(x)=cos(x) => \bar F(x,rho) = J_0(rho) cos(x)
!
USE bsplines
USE futils
!
IMPLICIT NONE
INTEGER, PARAMETER :: nx=10, nidbas=3, dim=nx+nidbas, npt=100, &
& nrho=21, nnq=5
DOUBLE PRECISION :: xgrid(0:nx), fgrid(0:nx), coefs(dim)
DOUBLE PRECISION :: xpt(npt), fcalc(npt), fexact(npt)
DOUBLE PRECISION :: averf(0:nx), averfexact(0:nx)
DOUBLE PRECISION, POINTER :: splines(:,:)
TYPE(spline1d) :: spl
DOUBLE PRECISION :: pi, twopi, dx, lperiod, dth
DOUBLE PRECISION :: drho, rho(nrho), erraver(nrho,nnq)
INTEGER :: i, j, nq(nnq)
DOUBLE PRECISION :: dbesj0
!
CHARACTER(len=128) :: file='gyro.h5'
INTEGER :: fid
!
INTERFACE
SUBROUTINE gyro(spl, xgrid, coefs, rho, nq, averf)
USE bsplines
TYPE(spline1d) :: spl
DOUBLE PRECISION, INTENT(in) :: xgrid(0:), coefs(:), rho
INTEGER, INTENT(in) :: nq
DOUBLE PRECISION, INTENT(out) :: averf(0:)
END SUBROUTINE gyro
END INTERFACE
!
pi = 4.0d0*ATAN(1.0d0)
twopi = 2.0d0*pi
!
! Create hdf5 file
CALL creatf(file, fid, 'gyro Result File')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
!
! Grid and function values
dx = twopi/nx
xgrid(0) = 0.0d0
DO i=1,nx
xgrid(i) = xgrid(0) + i*dx
END DO
lperiod = xgrid(nx)-xgrid(0)
fgrid = func(xgrid)
!
! Spline interpolation
CALL set_splcoef(nidbas, xgrid, spl, period=.TRUE.)
CALL get_splcoef(spl, fgrid, coefs)
WRITE(*,'(a)') 'Spline coefficients'
WRITE(*,'(i5,f12.4)') (i-1, coefs(i), i=1,dim)
!
! Error of interpolation
CALL RANDOM_NUMBER(xpt)
xpt = twopi*xpt
CALL gridval(spl, xpt, fcalc, 0, coefs)
fexact = func(xpt)
!!$ WRITE(*,'(a)') 'Interpolated and exact f'
!!$ WRITE(*,'(3(1pe12.3))') (xpt(i), fcalc(i), fexact(i), i=1,npt)
WRITE(*,'(a,1pe12.3)') 'Interpolation error', norm2(fcalc-fexact)
CALL putarr(fid, '/X', xpt)
CALL putarr(fid, '/FEXACT', fexact, 'Exact values')
CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values')
!
! Gyro-averaged of F at grid points xgrid(0:nx-1)
drho = 5.0d0/nrho
DO i=1,nrho
rho(i) = i*drho
DO j=1,nnq
nq(j) = 2**(j+1)
CALL gyro(spl, xgrid, coefs, rho(i), nq(j), averf)
averfexact = dbesj0(rho(i))*COS(xgrid)
erraver(i,j) = norm2(averfexact-averf)
END DO
END DO
!
WRITE(*,'(a,f8.3,i5)') 'averaged f at rho, nq =', rho(nrho), nq(nnq)
WRITE(*,'(3(1pe12.3))') (xgrid(i),averf(i),averfexact(i), i=0,nx)
CALL putarr(fid, '/XGRID', xgrid)
CALL putarr(fid, '/AVERF', averf, 'Averaged F')
CALL putarr(fid, '/AVERFEXACT', averfexact, 'Averaged F exact')
!
CALL putarr(fid, '/RHO', rho)
CALL putarr(fid, '/NQ', nq)
CALL putarr(fid, '/ERRAVER', erraver)
!
! Clean up
CALL destroy_sp(spl)
CALL closef(fid)
CONTAINS
FUNCTION func(x)
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: func(SIZE(x))
func = COS(x)
END FUNCTION func
FUNCTION norm2(x)
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:)
norm2 = SQRT(DOT_PRODUCT(x,x))
END FUNCTION norm2
END PROGRAM main
!
SUBROUTINE gyro(spl, xgrid, coefs, rho, nq, averf)
!
! Gyro-average using spline SPL and NQ point quadratire for
! theta-integration.
!
USE bsplines
IMPLICIT NONE
TYPE(spline1d) :: spl
DOUBLE PRECISION, INTENT(in) :: xgrid(0:), coefs(:), rho
INTEGER, INTENT(in) :: nq
DOUBLE PRECISION, INTENT(out) :: averf(0:)
!
DOUBLE PRECISION :: th(nq), wth(nq), xq(nq)
DOUBLE PRECISION, ALLOCATABLE :: avermat(:,:)
DOUBLE PRECISION :: pi, twopi, lperiod, dth
DOUBLE PRECISION, POINTER :: splines(:,:)
INTEGER :: i, j, iq, nx, dim
!
pi = 4.0d0*ATAN(1.0d0)
twopi = 2.0d0*pi
!
! Quadrature in theta
dth = twopi/nq
th(1) = -pi + dth/2.0d0
DO iq=2,nq
th(iq) = th(iq-1)+dth
END DO
wth = dth
!
! Gyro-averaging matrix
CALL get_dim(spl, dim, nx)
lperiod = xgrid(nx)-xgrid(0)
ALLOCATE(avermat(0:nx,dim))
DO i=0,nx
xq = xgrid(i) + rho*COS(th)
xq = xgrid(0) + MODULO(xq-xgrid(0), lperiod)
CALL allsplines(spl, xq, splines)
DO j=1,dim
avermat(i,j) = DOT_PRODUCT(wth, splines(:,j))/twopi
END DO
END DO
!
! Gyro-averaged of F at grid points xgrid(0:nx)
averf = MATMUL(avermat, coefs)
!
DEALLOCATE(avermat)
END SUBROUTINE gyro
diff --git a/examples/ibcmat.f90 b/examples/ibcmat.f90
index d48ea56..12ec56f 100644
--- a/examples/ibcmat.f90
+++ b/examples/ibcmat.f90
@@ -1,176 +1,176 @@
!>
!> @file ibcmat.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
SUBROUTINE ibcmat(mat, ny)
!
! Apply BC on matrix
!
USE matrix
IMPLICIT NONE
TYPE(gbmat), INTENT(inout) :: mat
INTEGER, INTENT(in) :: ny
INTEGER :: kl, ku, nrank, i, j
DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
INTEGER :: i0, ii
INTEGER :: i0_arr(ny), col(ny)
!===========================================================================
! 1.0 Prologue
!
kl = mat%kl
ku = mat%ku
nrank = mat%rank
ALLOCATE(zsum(nrank), arr(nrank))
!
i0 = nrank - ku
WRITE(*,'(a,i6)') 'Estimated i0', i0
DO i=1,ny
arr = 0.0d0
col(i) = nrank-ny+i
CALL getcol(mat, nrank-ny+i, arr)
DO ii=1,nrank
i0_arr(i)=ii
IF(arr(ii) .NE. 0.0d0) EXIT
END DO
END DO
!!$ WRITE(*,'(a/(10i6))') 'col', col
!!$ WRITE(*,'(a/(10i6))') 'i0_arr', i0_arr
!
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum on the NY-th row
!
zsum = 0.0d0
DO i=1,ny
arr = 0.0d0
CALL getrow(mat, i, arr)
DO j=1,ny+ku
zsum(j) = zsum(j) + arr(j)
END DO
END DO
CALL putrow(mat, ny, zsum)
!
! The horizontal sum on the NY-th column
!
zsum = 0.0d0
DO j=1,ny
arr = 0.0d0
CALL getcol(mat, j, arr)
DO i=ny,ny+kl
zsum(i) = zsum(i) + arr(i)
END DO
END DO
CALL putcol(mat, ny, zsum)
!
! The away operator
!
DO j = 1,ny-1
arr = 0.0d0; arr(j) = 1.0d0
CALL putcol(mat, j, arr)
END DO
!
DO i = 1,ny-1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!
!===========================================================================
! 3.0 Dirichlet on right boundary
!
DO j = nrank, nrank-ny+1, -1
arr = 0.0d0; arr(j) = 1.0d0
CALL putcol(mat, j, arr)
END DO
!
DO i = nrank, nrank-ny+1, -1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(zsum, arr)
!
END SUBROUTINE ibcmat
!+++
SUBROUTINE ibcrhs(rhs, ny)
!
! Apply BC on RHS
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(inout) :: rhs(:)
INTEGER, INTENT(in) :: ny
INTEGER :: nrank
DOUBLE PRECISION :: zsum
!===========================================================================
! 1.0 Prologue
!
nrank = SIZE(rhs,1)
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum
!
zsum = SUM(rhs(1:ny))
rhs(ny) = zsum
rhs(1:ny-1) = 0.0d0
!===========================================================================
! 3.0 Dirichlet on right boundary
!
rhs(nrank-ny+1:nrank) = 0.0d0
END SUBROUTINE ibcrhs
!+++
SUBROUTINE ibcrhs3(rhs, ny)
!
! Apply BC on RHS
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(inout) :: rhs(:,:)
INTEGER, INTENT(in) :: ny
INTEGER :: nrank, nz, k
DOUBLE PRECISION :: zsum
!===========================================================================
! 1.0 Prologue
!
nrank = SIZE(rhs,1)
nz = SIZE(rhs,2)
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum
!
DO k=1,nz
zsum = SUM(rhs(1:ny,k))
rhs(ny,k) = zsum
rhs(1:ny-1,k) = 0.0d0
END DO
!===========================================================================
! 3.0 Dirichlet on right boundary
!
DO k=1,nz
rhs(nrank-ny+1:nrank,k) = 0.0d0
END DO
END SUBROUTINE ibcrhs3
diff --git a/examples/mesh.f90 b/examples/mesh.f90
index aa102a9..a532f08 100644
--- a/examples/mesh.f90
+++ b/examples/mesh.f90
@@ -1,66 +1,66 @@
!>
!> @file mesh.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Equidistant and non-equidistant mesh
!
USE bsplines
IMPLICIT NONE
INTEGER :: i, nx
DOUBLE PRECISION :: coefs(5), dev, dx
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid
!
INTERFACE
SUBROUTINE meshdist(coefs, x, nx)
DOUBLE PRECISION, INTENT(in) :: coefs(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
END SUBROUTINE meshdist
END INTERFACE
!
NAMELIST /newrun/ nx, coefs
!
nx = 8 ! Number oh intevals in x
coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
! see function FDIST in MESHDIST
!
READ(*,newrun)
WRITE(*,newrun)
!
ALLOCATE(xgrid(0:nx))
xgrid(0) = 0.0d0
xgrid(nx) = 1.0d0
CALL meshdist(coefs, xgrid, nx)
PRINT*, 'Mesh is equidistant?', is_equid(xgrid, dev)
PRINT*, 'dev =', dev
!
dx = 1.0d0/REAL(nx,8)
xgrid = (/ (i*dx, i=0,nx) /)
PRINT*, 'Mesh is equidistant?', is_equid(xgrid, dev)
PRINT*, 'dev =', dev
!
DEALLOCATE(xgrid)
END PROGRAM main
diff --git a/examples/meshdist.f90 b/examples/meshdist.f90
index a20c2fd..f6e24f8 100644
--- a/examples/meshdist.f90
+++ b/examples/meshdist.f90
@@ -1,82 +1,82 @@
!>
!> @file meshdist.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
SUBROUTINE meshdist(c, x, nx)
!
! Construct a 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
diff --git a/examples/moments.f90 b/examples/moments.f90
index 0cb925b..ac4b874 100644
--- a/examples/moments.f90
+++ b/examples/moments.f90
@@ -1,228 +1,228 @@
!>
!> @file moments.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
!
! Compute moments of f(x), using its Spline representation
!
MODULE globals
USE bsplines
USE matrix
IMPLICIT NONE
DOUBLE PRECISION, PARAMETER :: pi = 3.14159265359d0
DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), rhs(:), sol(:)
DOUBLE PRECISION, ALLOCATABLE :: finteg(:,:), moms(:)
TYPE(spline1d), SAVE :: splx
TYPE(gbmat), SAVE :: mat
CONTAINS
!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE dismat(spl, mat)
!
! Assembly FE matrix mat using spline spl
!
TYPE(spline1d), INTENT(in) :: spl
TYPE(gbmat), INTENT(inout) :: mat
INTEGER :: nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION :: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order
DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, nrank, nx, nidbas)
ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
CALL coefeq(xgauss(igauss), idert, iderw, coefs)
DO iterm=1,kterms
DO jt=0,nidbas
DO iw=0,nidbas
contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
& fun(iw,iderw(iterm)) * wgauss(igauss)
irow=i+iw; jcol=i+jt
CALL updtmat(mat, irow, jcol, contrib)
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
DEALLOCATE(iderw, idert, coefs)
!
CONTAINS
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
!
! Mass matrix
!
c(1) = 1.d0
idt(1) = 0
idw(1) = 0
END SUBROUTINE coefeq
END SUBROUTINE dismat
!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE disrhs(spl, rhs)
!
! Assenbly the RHS using spline spl
!
TYPE(spline1d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
INTEGER :: nrank, nx, nidbas, ngauss
INTEGER :: i, igauss
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, nrank, nx, nidbas)
!
ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
rhs(1:nrank) = 0.0d0
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
contrib = wgauss(igauss)*rhseq(xgauss(igauss))
rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(v)
DOUBLE PRECISION, INTENT(in) :: v
rhseq = SQRT(1.0d0/(2.0d0*pi)) * EXP(-0.5d0*v**2)
END FUNCTION rhseq
END SUBROUTINE disrhs
END MODULE globals
!
!================================================================================
PROGRAM main
USE bsplines
USE globals
IMPLICIT NONE
INTEGER :: nidbas, nx, nmoms, ngauss, rank, kl, ku
INTEGER :: i
DOUBLE PRECISION :: a, b, dx
!
! Input
!
WRITE(*,'(a)') 'Enter, nidbas, a, b, nx, nmoms'
READ(*,*) nidbas, a, b, nx, nmoms
!
! Equidistant mesh
!
ALLOCATE(xgrid(0:nx))
dx = (b-a)/REAL(nx)
DO i=0, nx
xgrid(i) = a + i*dx
END DO
WRITE(*,'(a/(8(1pe12.4)))') 'XGRID', xgrid
!
! Set up spline
!
ngauss = nidbas+1
CALL set_spline(nidbas, ngauss, xgrid, splx)
!
! Mass matrix
!
CALL get_dim(splx, rank) ! Rank of the FE Mass matrix
kl = nidbas
ku = kl
CALL init(kl, ku, rank, 1, mat)
WRITE(*,'(a,3i6)') 'kl, ku, rank', kl, ku, rank
CALL dismat(splx, mat)
!
! RHS
!
ALLOCATE(rhs(rank), sol(rank))
CALL disrhs(splx, rhs)
!
! Solve for Spline coefs
!
CALL factor(mat)
CALL bsolve(mat, rhs, sol)
WRITE(*,'(a/(8(1pe12.4)))') 'SOL', sol
WRITE(*,'(a,1pe20.12)') ' Integral of sol using FINTG', fintg(splx, sol)
!
! Moments
!
ALLOCATE(finteg(rank,0:nmoms), moms(0:nmoms))
CALL calc_integ(splx, finteg)
DO i=0,nmoms
moms(i) = DOT_PRODUCT(sol(:), finteg(:,i))
END DO
WRITE(*,'(a,i3)') 'Moments of orders from 0 to', nmoms
WRITE(*,'(8(1pe20.12))') moms
!
DEALLOCATE(finteg, moms)
DEALLOCATE(rhs, sol)
DEALLOCATE(xgrid)
CALL destroy(mat)
CALL destroy_sp(splx)
END PROGRAM main
diff --git a/examples/optim1.f90 b/examples/optim1.f90
index 8db612e..017aead 100644
--- a/examples/optim1.f90
+++ b/examples/optim1.f90
@@ -1,138 +1,138 @@
!>
!> @file optim1.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test and compare performance of using "spline" and
! "pp" forms. 1D case
!
USE bsplines
!
IMPLICIT NONE
INTEGER :: nx, nidbas, nrank, npt=1000000
INTEGER :: i
DOUBLE PRECISION :: dx
DOUBLE PRECISION :: seconds, t0, t1
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, coefs
DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fpt0(:), fpt1(:), fun(:, :)
INTEGER, ALLOCATABLE :: left(:)
TYPE(spline1d) :: splx
!
NAMELIST /newrun/ nx, nidbas, npt
!===============================================================================
!
! 1D grid
!
nx = 10
nidbas = 3
npt = 1000000
READ(*,newrun)
WRITE(*,newrun)
ALLOCATE(xgrid(0:nx))
dx = 1.0d0/REAL(nx)
xgrid = (/ (i*dx,i=0,nx) /)
WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
!
! Set up spline
!
CALL set_spline(nidbas, 4, xgrid, splx)
nrank = splx%dim
WRITE(*,'(a, i5)') 'nrank =', nrank
WRITE(*,'(a/(10f8.3))') 'knots', splx%knots
!
ALLOCATE(xpt(npt))
ALLOCATE(left(npt))
ALLOCATE(fun(0:nidbas,0:1)) ! Values and first derivatives of all Splines
CALL RANDOM_NUMBER(xpt)
CALL locintv(splx, xpt, left)
!===============================================================================
!
! Check def_basfun_opt
!
CALL basfun_recur(xpt(101), splx, fun, left(101)+1)
WRITE(*,'(/a,f20.15,i4/(2f20.15))') 'BASFUN_RECUR at X=', xpt(101), left(101),&
& (fun(:,i), i=0,1)
!
!!$ CALL def_basfun(xpt(101), splx, fun)
CALL basfun(xpt(101), splx, fun, left(101)+1)
WRITE(*,'(/a,f20.15/(2f20.15))') 'DEF_BASFUN at X=', xpt(101), &
& (fun(:,i), i=0,1)
!
! Performance of basis function computations
!
t0 = seconds()
DO i=1,npt
CALL basfun_recur(xpt(i), splx, fun, left(i)+1)
END DO
WRITE(*,'(/a,1pe12.3)') 'BASFUN_RECUR time (s)', (seconds()-t0)/REAL(npt)
!
t0 = seconds()
DO i=1,npt
!!$ CALL def_basfun(xpt(i), splx, fun)
CALL basfun(xpt(i), splx, fun, left(i)+1)
END DO
WRITE(*,'(/a,1pe12.3)') 'DEF_BASFUN time (s)', (seconds()-t0)/REAL(npt)
!===============================================================================
!
! Check and performance of GRIDVAL using PPFORM and SPLINE expansion
!
ALLOCATE(coefs(nrank))
DEALLOCATE(xpt)
ALLOCATE(xpt(npt), fpt0(npt), fpt1(npt))
CALL RANDOM_NUMBER(xpt)
!
splx%nlppform = .TRUE.
coefs = 1.0d0
!
CALL gridval(splx, xpt(1:1), fpt0(1:1), 0, coefs)
!
t0 = seconds()
CALL gridval(splx, xpt, fpt1, 1)
CALL gridval(splx, xpt, fpt0, 0)
t1 = seconds()-t0
WRITE(*,'(/a,2(1pe12.3))') 'PPFORM: Max errors', &
& MAXVAL(ABS(fpt0-1.0d0)) ,MAXVAL(ABS(fpt1))
WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt)
!
splx%nlppform = .FALSE.
coefs = 1.0d0
CALL gridval(splx, xpt(1:1), fpt0(1:1), 0, coefs)
t0 = seconds()
CALL gridval(splx, xpt, fpt1, 1)
CALL gridval(splx, xpt, fpt0, 0)
t1 = seconds()-t0
WRITE(*,'(/a,2(1pe12.3))') 'SPLINES: Max errors', &
& MAXVAL(ABS(fpt0-1.0d0)) ,MAXVAL(ABS(fpt1))
WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt)
!===============================================================================
!
! Clean up
!
CALL destroy_sp(splx)
DEALLOCATE(xgrid, coefs)
DEALLOCATE(xpt, fpt0, fpt1)
DEALLOCATE(fun)
END PROGRAM main
diff --git a/examples/optim2.f90 b/examples/optim2.f90
index ffb9773..bf18a77 100644
--- a/examples/optim2.f90
+++ b/examples/optim2.f90
@@ -1,119 +1,119 @@
!>
!> @file optim2.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test and compare performance of using "spline" and
! "pp" forms. 2D case
!
USE bsplines
!
IMPLICIT NONE
INTEGER :: nx, ny, ngauss(2), nidbas(2)
INTEGER :: npt, d1, d2
INTEGER :: i, j
DOUBLE PRECISION :: pi, dx, dy
DOUBLE PRECISION :: seconds, t0, t1
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: coefs
DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), fpt00(:), fpt01(:), fpt10(:)
TYPE(spline2d) :: splxy
!
NAMELIST /newrun/ nx, ny, nidbas, npt
!===============================================================================
!
! 2D grid
!
nx = 8
ny = 8
nidbas = (/ 3, 3 /)
npt = 1000000
READ(*,newrun)
WRITE(*,newrun)
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny))
dx = 1.0d0/REAL(nx)
xgrid = (/ (i*dx,i=0,nx) /)
dy = 2.0d0*pi/REAL(ny)
ygrid = (/ (j*dy,j=0,ny) /)
!
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
!
! Set up spline
!
ngauss = 4
CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, (/.FALSE., .TRUE./))
d1 = splxy%sp1%dim
d2 = splxy%sp2%dim
WRITE(*,'(a,3i4)') 'd1, d2 =', d1, d2
WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
!===============================================================================
!
! Check and performance of GRIDVAL using PPFORM and SPLINE expansion
!
ALLOCATE(xpt(npt), ypt(npt))
CALL RANDOM_NUMBER(xpt)
CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt
!
ALLOCATE(coefs(d1,d2))
ALLOCATE(fpt00(npt), fpt01(npt), fpt10(npt))
!
coefs = 1.0d0
!
splxy%sp1%nlppform = .TRUE.
splxy%sp2%nlppform = .TRUE.
CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/), coefs)
!
t0 = seconds()
CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/))
CALL gridval(splxy, xpt, ypt, fpt01, (/0,1/))
CALL gridval(splxy, xpt, ypt, fpt10, (/1,0/))
t1 = seconds()-t0
WRITE(*,'(/a,3(1pe12.3))') 'PPFORM: Max errors', &
& MAXVAL(ABS(fpt00-1.0d0)), MAXVAL(ABS(fpt01)), MAXVAL(ABS(fpt10))
WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt)
!
splxy%sp1%nlppform = .FALSE.
splxy%sp2%nlppform = .FALSE.
CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/), coefs)
t0 = seconds()
CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/))
CALL gridval(splxy, xpt, ypt, fpt01, (/0,1/))
CALL gridval(splxy, xpt, ypt, fpt10, (/1,0/))
t1 = seconds()-t0
WRITE(*,'(/a,3(1pe12.3))') 'SPLINES: Max errors', &
& MAXVAL(ABS(fpt00-1.0d0)), MAXVAL(ABS(fpt01)), MAXVAL(ABS(fpt10))
WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt)
!===============================================================================
!
! Clean up
!
CALL destroy_sp(splxy)
DEALLOCATE(xgrid, ygrid, coefs)
DEALLOCATE(xpt, ypt, fpt00, fpt01, fpt10)
END PROGRAM main
diff --git a/examples/optim3.f90 b/examples/optim3.f90
index 9a617a0..5c50aba 100644
--- a/examples/optim3.f90
+++ b/examples/optim3.f90
@@ -1,137 +1,137 @@
!>
!> @file optim3.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test and compare performance of using "spline" and
! "pp" forms. 2D1D case
!
USE bsplines
!
IMPLICIT NONE
INTEGER :: nx, ny, nz, ngauss(3), nidbas(3)
INTEGER :: npt, d1, d2, d3
INTEGER :: i, j, k
DOUBLE PRECISION :: pi, dx, dy, dz
DOUBLE PRECISION :: seconds, t0, t1
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, zgrid
DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: coefs
DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), zpt(:)
DOUBLE PRECISION, ALLOCATABLE :: fpt000(:), fpt100(:), fpt010(:), fpt001(:)
TYPE(spline2d1d) :: splxyz
!
NAMELIST /newrun/ nx, ny, nz, nidbas, npt
!===============================================================================
!
! 2D grid
!
nx = 8
ny = 8
nz = 8
nidbas = (/ 3, 3, 3 /)
npt = 1000000
READ(*,newrun)
WRITE(*,newrun)
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz))
dx = 1.0d0/REAL(nx)
xgrid = (/ (i*dx,i=0,nx) /)
dy = 2.0d0*pi/REAL(ny)
ygrid = (/ (j*dy,j=0,ny) /)
dz = 2.0d0*pi/REAL(nz)
zgrid = (/ (k*dz,k=0,nz) /)
!
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz)
!
! Set up spline
!
ngauss = 4
CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid, splxyz, &
& (/.FALSE., .TRUE., .TRUE./))
d1 = splxyz%sp12%sp1%dim
d2 = splxyz%sp12%sp2%dim
d3 = splxyz%sp3%dim
WRITE(*,'(a,3i4)') 'd1, d2, d3 =', d1, d2, d3
WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxyz%sp12%sp1%knots
WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxyz%sp12%sp2%knots
WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Z', splxyz%sp3%knots
!===============================================================================
!
! Check and performance of GRIDVAL using PPFORM and SPLINE expansion
!
ALLOCATE(xpt(npt), ypt(npt), zpt(npt))
CALL RANDOM_NUMBER(xpt)
CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt
CALL RANDOM_NUMBER(zpt); zpt = 2.0d0*pi*zpt
!
ALLOCATE(coefs(d1,d2,d3))
ALLOCATE(fpt000(npt), fpt100(npt), fpt010(npt), fpt001(npt))
!
coefs = 1.0d0
!
splxyz%sp12%sp1%nlppform = .TRUE.
splxyz%sp12%sp2%nlppform = .TRUE.
splxyz%sp3%nlppform = .TRUE.
CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/), coefs)
t0 = seconds()
CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/))
CALL gridval(splxyz, xpt, ypt, zpt, fpt100, (/1,0,0/))
CALL gridval(splxyz, xpt, ypt, zpt, fpt010, (/0,1,0/))
CALL gridval(splxyz, xpt, ypt, zpt, fpt001, (/0,0,1/))
t1 = seconds()-t0
WRITE(*,'(/a,4(1pe12.3))') 'PPFORM: Max errors', &
& MAXVAL(ABS(fpt000-1.0d0)), &
& MAXVAL(ABS(fpt100)), &
& MAXVAL(ABS(fpt010)), &
& MAXVAL(ABS(fpt001))
WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt)
!
splxyz%sp12%sp1%nlppform = .FALSE.
splxyz%sp12%sp2%nlppform = .FALSE.
splxyz%sp3%nlppform = .FALSE.
CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/), coefs)
t0 = seconds()
CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/))
CALL gridval(splxyz, xpt, ypt, zpt, fpt100, (/1,0,0/))
CALL gridval(splxyz, xpt, ypt, zpt, fpt010, (/0,1,0/))
CALL gridval(splxyz, xpt, ypt, zpt, fpt001, (/0,0,1/))
t1 = seconds()-t0
WRITE(*,'(/a,4(1pe12.3))') 'SPLINES: Max errors', &
& MAXVAL(ABS(fpt000-1.0d0)), &
& MAXVAL(ABS(fpt100)), &
& MAXVAL(ABS(fpt010)), &
& MAXVAL(ABS(fpt001))
WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt)
!===============================================================================
!
! Clean up
!
CALL destroy_sp(splxyz)
DEALLOCATE(xgrid, ygrid, zgrid, coefs)
DEALLOCATE(xpt, ypt, fpt000,fpt100, fpt010, fpt001)
END PROGRAM main
diff --git a/examples/pde1d.f90 b/examples/pde1d.f90
index 86584b8..74fd2ea 100644
--- a/examples/pde1d.f90
+++ b/examples/pde1d.f90
@@ -1,419 +1,419 @@
!>
!> @file pde1d.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Solving the following 1d differential eqation using splines:
!
! -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0
! exact solution: f(r) = 1 - r^k
!
USE bsplines
USE matrix
USE futils
USE conmat_mod
IMPLICIT NONE
INTEGER :: nx, nidbas, ngauss, kdiff
INTEGER :: i, nrank, kl, ku
LOGICAL :: nlppform
DOUBLE PRECISION :: coefs(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol
TYPE(spline1d) :: splx
TYPE(gbmat) :: mat
!
CHARACTER(len=128) :: file='pde1d.h5'
INTEGER :: fid
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: solcal, solana, errsol
DOUBLE PRECISION :: seconds, t0, tmat, tfact, tsolv
!
INTERFACE
SUBROUTINE dismat(spl, mat)
USE bsplines
USE matrix
TYPE(spline1d), INTENT(in) :: spl
TYPE(gbmat), INTENT(inout) :: mat
END SUBROUTINE dismat
SUBROUTINE disrhs(kdiff, spl, rhs)
USE bsplines
INTEGER, INTENT(in) :: kdiff
TYPE(spline1d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
END SUBROUTINE disrhs
SUBROUTINE meshdist(coefs, x, nx)
DOUBLE PRECISION, INTENT(in) :: coefs(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
END SUBROUTINE meshdist
END INTERFACE
!
NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number oh intevals in x
nidbas = 3 ! Degree of splines
ngauss = 4 ! Number of Gauss points/interval
kdiff = 2 ! Exponent of differential problem
nlppform = .TRUE. ! Use PPFORM for gridval or not
coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
! see function FDIST in MESHDIST
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x axis
!
ALLOCATE(xgrid(0:nx))
xgrid(0) = 0.0d0
xgrid(nx) = 1.0d0
CALL meshdist(coefs, xgrid, nx)
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE1D Result File')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
CALL attach(fid, '/', 'NGAUSS', ngauss)
CALL attach(fid, '/', 'KDIFF', kdiff)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
t0 = seconds()
CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
CALL get_dim(splx, nrank) ! Rank of the FE matrix
WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
!
! FE matrix assembly
!
kl = nidbas
ku = kl
CALL init(kl, ku, nrank, 1, mat)
WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank
!!$ CALL dismat(splx, mat)
CALL conmat(splx, mat, coefeq)
!
ALLOCATE(arr(nrank))
!!$ WRITE(*,'(/a)') 'Matrice before BC'
!!$ DO i=1,nrank
!!$ CALL getrow(mat, i, arr)
!!$ WRITE(*,'(12f8.3)') arr, SUM(arr)
!!$ END DO
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(kdiff, splx, rhs)
!!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs
!
! Set BC f(r=1) = 0 on matrix
!
arr(1:nrank-1) = 0.0d0
arr(nrank) = 1.0d0
CALL putrow(mat, nrank, arr)
CALL putcol(mat, nrank, arr)
tmat = seconds() - t0
!!$ WRITE(*,'(/a)') 'Matrice after BC'
!!$ DO i=1,nrank
!!$ CALL getrow(mat, i, arr)
!!$ WRITE(*,'(12f8.3)') arr
!!$ END DO
!
! Set BC f(r=1) = 0 on RHS
!
rhs(nrank) = 0.0d0
!!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS after BC', rhs
CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
CALL putarr(fid, '/MAT', mat%val, 'GB matrice with BC')
CALL attach(fid, '/MAT', 'KL', mat%kl)
CALL attach(fid, '/MAT', 'KU', mat%ku)
CALL attach(fid, '/MAT', 'RANK', mat%rank)
!===========================================================================
! 3.0 Solve the dicretized system
!
t0 = seconds()
CALL factor(mat)
tfact = seconds() - t0
t0 = seconds()
CALL bsolve(mat, rhs, sol)
tsolv = seconds() - t0
!!$ WRITE(*,'(/a,/(12(f8.3)))') 'SOL', sol
CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
!
WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splx, sol), &
& fintg(splx, sol)-REAL(kdiff,8)/REAL(kdiff+1,8)
!===========================================================================
! 4.0 Check the solution and its 1st derivative
!
ALLOCATE(solcal(0:nx,0:2), solana(0:nx,0:2), errsol(0:nx,0:2))
DO i =0,nx
solana(i,0) = 1.0d0-xgrid(i)**kdiff
solana(i,1) = -kdiff*xgrid(i)**(kdiff-1)
solana(i,2) = -kdiff*(kdiff-1)*xgrid(i)**(kdiff-2)
END DO
CALL gridval(splx, xgrid, solcal(:,0), 0, sol) ! Compute PPFORM and grid values
CALL gridval(splx, xgrid, solcal(:,1), 1) ! 1st derivative
CALL gridval(splx, xgrid, solcal(:,2), 2) ! 2nd derivative
errsol = solana - solcal
!
CALL putarr(fid, '/XGRID', xgrid)
CALL putarr(fid, '/SOLCAL', solcal)
CALL putarr(fid, '/SOLANA', solana)
CALL putarr(fid, '/ERR', errsol)
!
CALL creatg(fid, '/spline')
CALL attach(fid, '/spline', 'order', splx%order)
CALL putarr(fid, '/spline/knots', splx%knots, 'Spline knots')
WRITE(*,'(a,3(1pe12.3))') 'Rel. discretization errors (solution and derivatives).', &
& (SQRT( DOT_PRODUCT(errsol(:,i),errsol(:,i)) / &
& DOT_PRODUCT(solana(:,i),solana(:,i)) ), i=0,2)
!
WRITE(*,'(a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xgrid, rhs, sol)
DEALLOCATE(solcal, solana, errsol)
DEALLOCATE(arr)
CALL destroy(mat)
CALL destroy_sp(splx)
CALL closef(fid)
CONTAINS
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
! Weak form = Int(x*dw/dx*dt/dx)dx
!
c(1) = x
idt(1) = 1
idw(1) = 1
END SUBROUTINE coefeq
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function defined in FDIST
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
SUBROUTINE dismat(spl, mat)
!
! Assembly FE matrix mat using spline spl
!
USE bsplines
USE matrix
IMPLICIT NONE
TYPE(spline1d), INTENT(in) :: spl
TYPE(gbmat), INTENT(inout) :: mat
INTEGER :: nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION :: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order
DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, nrank, nx, nidbas)
ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
CALL coefeq(xgauss(igauss), idert, iderw, coefs)
DO iterm=1,kterms
DO jt=0,nidbas
DO iw=0,nidbas
contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
& fun(iw,iderw(iterm)) * wgauss(igauss)
irow=i+iw; jcol=i+jt
CALL updtmat(mat, irow, jcol, contrib)
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
DEALLOCATE(iderw, idert, coefs)
!
CONTAINS
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
!
! Weak form = Int(x*dw/dx*dt/dx)dx
!
c(1) = x
idt(1) = 1
idw(1) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!+++
SUBROUTINE disrhs(kdiff, spl, rhs)
!
! Assenbly the RHS using spline spl
!
USE bsplines
IMPLICIT NONE
INTEGER, INTENT(in) :: kdiff
TYPE(spline1d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
INTEGER :: nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, left
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, nrank, nx, nidbas)
!!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
!
ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
!!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
rhs(1:nrank) = 0.0d0
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
!!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
left = i-1
!!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x,k)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(in) :: k
rhseq = k*k*x**(k-1)
END FUNCTION rhseq
END SUBROUTINE disrhs
diff --git a/examples/pde1d_eig.f90 b/examples/pde1d_eig.f90
index 698efea..4105b1b 100644
--- a/examples/pde1d_eig.f90
+++ b/examples/pde1d_eig.f90
@@ -1,459 +1,459 @@
!>
!> @file pde1d_eig.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Solving the following 1d differential eqation using splines:
!
! -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0
! exact solution: f(r) = 1 - r^k
!
USE bsplines
USE matrix
USE futils
USE conmat_mod
IMPLICIT NONE
INTEGER :: nx, nidbas, ngauss, kdiff
INTEGER :: i, nrank, kl, ku
LOGICAL :: nlppform
DOUBLE PRECISION :: coefs(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol
TYPE(spline1d) :: splx
TYPE(gemat) :: mat
!!$ TYPE(gbmat) :: mat
!
CHARACTER(len=128) :: file='pde1d.h5'
INTEGER :: fid
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack
!
DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvals(:), work(:)
INTEGER :: j, info
!
INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25, &
& lworkl=maxncv*(maxncv+8), zero=0.0d0
DOUBLE PRECISION :: v(maxn,maxncv), workl(lworkl), workd(3*maxn), &
& d(maxncv,2), resid(maxn), w(maxn), &
& tol=0.0d0, sigma
DOUBLE PRECISION, EXTERNAL :: dnrm2
INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, &
& mode1=1, ierr
INTEGER :: ido, ipntr(11), iparam(11)
CHARACTER(len=1) :: bmat='I'
CHARACTER(len=2) :: which='SA'
LOGICAL :: rvec, select(maxncv)
!
INTERFACE
SUBROUTINE dismat(spl, mat)
USE bsplines
USE matrix
TYPE(spline1d), INTENT(in) :: spl
TYPE(gbmat), INTENT(inout) :: mat
END SUBROUTINE dismat
SUBROUTINE disrhs(kdiff, spl, rhs)
USE bsplines
INTEGER, INTENT(in) :: kdiff
TYPE(spline1d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
END SUBROUTINE disrhs
SUBROUTINE meshdist(coefs, x, nx)
DOUBLE PRECISION, INTENT(in) :: coefs(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
END SUBROUTINE meshdist
END INTERFACE
!
NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number oh intevals in x
nidbas = 3 ! Degree of splines
ngauss = 4 ! Number of Gauss points/interval
kdiff = 2 ! Exponent of differential problem
nlppform = .TRUE. ! Use PPFORM for gridval or not
coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
! see function FDIST in MESHDIST
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x axis
!
ALLOCATE(xgrid(0:nx))
xgrid(0) = 0.0d0
xgrid(nx) = 1.0d0
CALL meshdist(coefs, xgrid, nx)
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE1D Result File')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
CALL attach(fid, '/', 'NGAUSS', ngauss)
CALL attach(fid, '/', 'KDIFF', kdiff)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
t0 = seconds()
CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
CALL get_dim(splx, nrank) ! Rank of the FE matrix
WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
!
! FE matrix assembly
!
kl = nidbas
ku = kl
!!$ CALL init(kl, ku, nrank, 1, mat)
WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank
CALL init(nrank, 1, mat)
!!$ CALL dismat(splx, mat)
CALL conmat(splx, mat, coefeq)
!
ALLOCATE(arr(nrank))
!!$ WRITE(*,'(/a)') 'Matrice before BC'
!!$ DO i=1,nrank
!!$ CALL getrow(mat, i, arr)
!!$ WRITE(*,'(12f8.3)') arr, SUM(arr)
!!$ END DO
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(kdiff, splx, rhs)
!!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs
!
! Set BC f(r=1) = 0 on matrix
!
arr(1:nrank-1) = 0.0d0
arr(nrank) = 1.0d0
CALL putrow(mat, nrank, arr)
CALL putcol(mat, nrank, arr)
tmat = seconds() - t0
!
!===========================================================================
! 3.0 Eigevalue problem
!
! Using Lapack dsyev
!
t0 = seconds()
ALLOCATE(mata(nrank,nrank))
ALLOCATE(eigvals(nrank))
ALLOCATE(work(3*nrank))
mata=0.0d0
DO j=1,nrank
mata(j:nrank,j) = mat%val(j:nrank,j)
END DO
CALL putarr(fid, '/MAT', mata, 'matrix A')
CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info)
teig = seconds() - t0
PRINT*,'Info from DSYEV', info, arr(1)
IF(info.EQ.0) THEN
CALL putarr(fid, '/EIGVS', eigvals, 'eigenvalues of A')
CALL putarr(fid, '/EIGVECS', mata, 'eigenvectors of A')
WRITE(*,'(a/(10f10.4))') 'eigval', eigvals
END IF
!
! Using Arpack
!
ido = 0
iparam(1) = ishfts
iparam(3) = maxitr
iparam(7) = mode1
!
IF(nev.GT.0) THEN
t0 = seconds()
DO ! ARPACK reverse communication loop
CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, &
& ncv, v, maxn, iparam, ipntr, workd, workl,&
& lworkl, info)
IF(ido.EQ.-1 .OR. ido.EQ.1) THEN
!!$ WRITE(*,'(a/(10i4))') 'ipntr', ipntr
CALL av(nrank,workd(ipntr(1)), workd(ipntr(2)))
CYCLE
END IF
IF(info .LT. 0) THEN
PRINT*, 'Error in _saupd with info =', info
ELSE
rvec = .TRUE.
CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, &
& bmat, nrank, which, nev, tol, resid, ncv, v, maxn, &
& iparam, ipntr, workd, workl, lworkl, ierr )
IF( ierr .NE. 0 ) THEN
PRINT*,'Error in _seupd with ierr =', ierr
ELSE
nconv = iparam(5)
PRINT*, '--- eigenvalues and error ---'
DO j=1,nconv ! Residual norms
!!$ CALL av(nrank,v(1,j), w) ! d(1,j) is the j^th eigenvalue
!!$ CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1)
!!$ d(j,2) = dnrm2(nrank, w, 1)
!!$ d(j,2) = d(j,2)/abs(d(j,1))
WRITE(*,'(2(1pe12.4))') d(j,1), eigvals(j)-d(j,1)
!!$ CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A')
!!$ CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A')
END DO
EXIT
END IF
END IF
END DO ! End of ARPACK reverse communication loop
tarpack = seconds()-t0
END IF
!===========================================================================
! 9.0 Epilogue
!
WRITE(*,'(a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig
WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack
!
DEALLOCATE(xgrid, rhs, sol)
DEALLOCATE(arr)
CALL destroy(mat)
CALL destroy_sp(splx)
CALL closef(fid)
CONTAINS
SUBROUTINE av(n,v,w)
!
! Matrix vector product: w <- Av
INTEGER, INTENT(in) :: n
DOUBLE PRECISION, INTENT(in) :: v(*)
DOUBLE PRECISION, INTENT(out) :: w(*)
w(1:n) = vmx(mat,v(1:n))
END SUBROUTINE av
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
! Weak form = Int(x*dw/dx*dt/dx)dx
!
c(1) = x
idt(1) = 1
idw(1) = 1
END SUBROUTINE coefeq
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function defined in FDIST
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
SUBROUTINE dismat(spl, mat)
!
! Assembly FE matrix mat using spline spl
!
USE bsplines
USE matrix
IMPLICIT NONE
TYPE(spline1d), INTENT(in) :: spl
TYPE(gbmat), INTENT(inout) :: mat
INTEGER :: nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION :: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order
DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, nrank, nx, nidbas)
ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
CALL coefeq(xgauss(igauss), idert, iderw, coefs)
DO iterm=1,kterms
DO jt=0,nidbas
DO iw=0,nidbas
contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
& fun(iw,iderw(iterm)) * wgauss(igauss)
irow=i+iw; jcol=i+jt
CALL updtmat(mat, irow, jcol, contrib)
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
DEALLOCATE(iderw, idert, coefs)
!
CONTAINS
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
!
! Weak form = Int(x*dw/dx*dt/dx)dx
!
c(1) = x
idt(1) = 1
idw(1) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!+++
SUBROUTINE disrhs(kdiff, spl, rhs)
!
! Assenbly the RHS using spline spl
!
USE bsplines
IMPLICIT NONE
INTEGER, INTENT(in) :: kdiff
TYPE(spline1d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
INTEGER :: nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, left
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, nrank, nx, nidbas)
!!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
!
ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
!!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
rhs(1:nrank) = 0.0d0
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
!!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
left = i-1
!!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x,k)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(in) :: k
rhseq = k*k*x**(k-1)
END FUNCTION rhseq
END SUBROUTINE disrhs
diff --git a/examples/pde1d_eig_csr.f90 b/examples/pde1d_eig_csr.f90
index 02996ca..d8e4067 100644
--- a/examples/pde1d_eig_csr.f90
+++ b/examples/pde1d_eig_csr.f90
@@ -1,469 +1,469 @@
!>
!> @file pde1d_eig_csr.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Solving the following 1d differential eqation using splines:
!
! -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0
! exact solution: f(r) = 1 - r^k
!
USE bsplines
USE csr
USE futils
USE conmat_mod
IMPLICIT NONE
INTEGER :: nx, nidbas, ngauss, kdiff
INTEGER :: i, nrank, kl, ku
LOGICAL :: nlppform
DOUBLE PRECISION :: coefs(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol
TYPE(spline1d) :: splx
TYPE(csr_mat) :: mat
!!$ TYPE(gemat) :: mat
!!$ TYPE(gbmat) :: mat
!
CHARACTER(len=128) :: file='pde1d.h5'
INTEGER :: fid, ffid
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack
!
DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvals(:), work(:)
INTEGER :: j, info
!
INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25, &
& lworkl=maxncv*(maxncv+8), zero=0.0d0
DOUBLE PRECISION :: v(maxn,maxncv), workl(lworkl), workd(3*maxn), &
& d(maxncv,2), resid(maxn), w(maxn), &
& tol=0.0d0, sigma
DOUBLE PRECISION, EXTERNAL :: dnrm2
INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, &
& mode1=1, ierr
INTEGER :: ido, ipntr(11), iparam(11)
CHARACTER(len=1) :: bmat='I'
CHARACTER(len=2) :: which='SA'
LOGICAL :: rvec, select(maxncv)
!
INTERFACE
SUBROUTINE dismat(spl, mat)
USE bsplines
USE matrix
TYPE(spline1d), INTENT(in) :: spl
TYPE(gbmat), INTENT(inout) :: mat
END SUBROUTINE dismat
SUBROUTINE disrhs(kdiff, spl, rhs)
USE bsplines
INTEGER, INTENT(in) :: kdiff
TYPE(spline1d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
END SUBROUTINE disrhs
SUBROUTINE meshdist(coefs, x, nx)
DOUBLE PRECISION, INTENT(in) :: coefs(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
END SUBROUTINE meshdist
END INTERFACE
!
NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number oh intevals in x
nidbas = 3 ! Degree of splines
ngauss = 4 ! Number of Gauss points/interval
kdiff = 2 ! Exponent of differential problem
nlppform = .TRUE. ! Use PPFORM for gridval or not
coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
! see function FDIST in MESHDIST
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x axis
!
ALLOCATE(xgrid(0:nx))
xgrid(0) = 0.0d0
xgrid(nx) = 1.0d0
CALL meshdist(coefs, xgrid, nx)
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE1D Result File')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
CALL attach(fid, '/', 'NGAUSS', ngauss)
CALL attach(fid, '/', 'KDIFF', kdiff)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
t0 = seconds()
CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
CALL get_dim(splx, nrank) ! Rank of the FE matrix
WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
!
! FE matrix assembly
!
kl = nidbas
ku = kl
WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank
CALL init(nrank, 1, mat)
!!$ CALL dismat(splx, mat)
CALL conmat(splx, mat, coefeq)
CALL to_mat(mat)
CALL creatf('pde1d_eig.h5', ffid, 'PDE1D Result File')
PRINT*, 'rank', mat%rank
PRINT*, 'nnz', mat%nnz
PRINT*, 'irow', mat%irow
PRINT*, 'cols', mat%cols
CALL putmat(ffid,'/MAT',mat,'FE matrix')
CALL closef(ffid)
!
!
ALLOCATE(arr(nrank))
!!$ WRITE(*,'(/a)') 'Matrice before BC'
!!$ DO i=1,nrank
!!$ CALL getrow(mat, i, arr)
!!$ WRITE(*,'(12f8.3)') arr, SUM(arr)
!!$ END DO
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(kdiff, splx, rhs)
!!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs
!!$!
!!$! Set BC f(r=1) = 0 on matrix
!!$!
!!$ arr(1:nrank-1) = 0.0d0
!!$ arr(nrank) = 1.0d0
!!$ CALL putrow(mat, nrank, arr)
!!$ CALL putcol(mat, nrank, arr)
CALL putmat(fid,'/MATA', mat, 'FE matrix')
tmat = seconds() - t0
!
!===========================================================================
! 3.0 Eigevalue problem
!
! Using Lapack dsyev
!
t0 = seconds()
ALLOCATE(mata(nrank,nrank))
ALLOCATE(eigvals(nrank))
ALLOCATE(work(3*nrank))
mata=0.0d0
DO j=1,nrank
CALL getcol(mat, j, mata(:,j)) ! mata is a dense matrix
END DO
CALL putarr(fid, '/MAT', mata, 'matrix A')
CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info)
teig = seconds() - t0
PRINT*,'Info from DSYEV', info, arr(1)
IF(info.EQ.0) THEN
CALL putarr(fid, '/EIGVS', eigvals, 'eigenvalues of A')
CALL putarr(fid, '/EIGVECS', mata, 'eigenvectors of A')
WRITE(*,'(a/(10f10.4))') 'eigval', eigvals
END IF
!
! Using Arpack
!
ido = 0
iparam(1) = ishfts
iparam(3) = maxitr
iparam(7) = mode1
!
IF(nev.GT.0) THEN
t0 = seconds()
DO ! ARPACK reverse communication loop
CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, &
& ncv, v, maxn, iparam, ipntr, workd, workl,&
& lworkl, info)
IF(ido.EQ.-1 .OR. ido.EQ.1) THEN
!!$ WRITE(*,'(a/(10i4))') 'ipntr', ipntr
CALL av(nrank,workd(ipntr(1)), workd(ipntr(2)))
CYCLE
END IF
IF(info .LT. 0) THEN
PRINT*, 'Error in _saupd with info =', info
ELSE
rvec = .TRUE.
CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, &
& bmat, nrank, which, nev, tol, resid, ncv, v, maxn, &
& iparam, ipntr, workd, workl, lworkl, ierr )
IF( ierr .NE. 0 ) THEN
PRINT*,'Error in _seupd with ierr =', ierr
ELSE
nconv = iparam(5)
PRINT*, '--- eigenvalues and error ---'
DO j=1,nconv ! Residual norms
!!$ CALL av(nrank,v(1,j), w) ! d(1,j) is the j^th eigenvalue
!!$ CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1)
!!$ d(j,2) = dnrm2(nrank, w, 1)
!!$ d(j,2) = d(j,2)/abs(d(j,1))
WRITE(*,'(2(1pe12.4))') d(j,1), eigvals(j)-d(j,1)
!!$ CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A')
!!$ CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A')
END DO
EXIT
END IF
END IF
END DO ! End of ARPACK reverse communication loop
tarpack = seconds()-t0
END IF
!===========================================================================
! 9.0 Epilogue
!
WRITE(*,'(a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig
WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack
!
DEALLOCATE(xgrid, rhs, sol)
DEALLOCATE(arr)
CALL destroy(mat)
CALL destroy_sp(splx)
CALL closef(fid)
CONTAINS
SUBROUTINE av(n,v,w)
!
! Matrix vector product: w <- Av
INTEGER, INTENT(in) :: n
DOUBLE PRECISION, INTENT(in) :: v(*)
DOUBLE PRECISION, INTENT(out) :: w(*)
w(1:n) = vmx(mat,v(1:n))
END SUBROUTINE av
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
! Weak form = Int(x*dw/dx*dt/dx)dx
!
c(1) = x
idt(1) = 1
idw(1) = 1
END SUBROUTINE coefeq
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function defined in FDIST
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
SUBROUTINE dismat(spl, mat)
!
! Assembly FE matrix mat using spline spl
!
USE bsplines
USE matrix
IMPLICIT NONE
TYPE(spline1d), INTENT(in) :: spl
TYPE(gbmat), INTENT(inout) :: mat
INTEGER :: nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION :: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order
DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, nrank, nx, nidbas)
ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
CALL coefeq(xgauss(igauss), idert, iderw, coefs)
DO iterm=1,kterms
DO jt=0,nidbas
DO iw=0,nidbas
contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
& fun(iw,iderw(iterm)) * wgauss(igauss)
irow=i+iw; jcol=i+jt
CALL updtmat(mat, irow, jcol, contrib)
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
DEALLOCATE(iderw, idert, coefs)
!
CONTAINS
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
!
! Weak form = Int(x*dw/dx*dt/dx)dx
!
c(1) = x
idt(1) = 1
idw(1) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!+++
SUBROUTINE disrhs(kdiff, spl, rhs)
!
! Assenbly the RHS using spline spl
!
USE bsplines
IMPLICIT NONE
INTEGER, INTENT(in) :: kdiff
TYPE(spline1d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
INTEGER :: nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, left
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, nrank, nx, nidbas)
!!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
!
ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
!!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
rhs(1:nrank) = 0.0d0
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
!!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
left = i-1
!!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x,k)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(in) :: k
rhseq = k*k*x**(k-1)
END FUNCTION rhseq
END SUBROUTINE disrhs
diff --git a/examples/pde1d_eig_gb.f90 b/examples/pde1d_eig_gb.f90
index c80f22d..4b1224f 100644
--- a/examples/pde1d_eig_gb.f90
+++ b/examples/pde1d_eig_gb.f90
@@ -1,460 +1,460 @@
!>
!> @file pde1d_eig_gb.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Solving the following 1d differential eqation using splines:
!
! -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0
! exact solution: f(r) = 1 - r^k
!
USE bsplines
USE matrix
USE futils
USE conmat_mod
IMPLICIT NONE
INTEGER :: nx, nidbas, ngauss, kdiff
INTEGER :: i, nrank, kl, ku
LOGICAL :: nlppform
DOUBLE PRECISION :: coefs(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol
TYPE(spline1d) :: splx
TYPE(gemat) :: mat
!!$ TYPE(gbmat) :: mat
!
CHARACTER(len=128) :: file='pde1d.h5'
INTEGER :: fid
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack
!
DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvals(:), work(:)
INTEGER :: j, info
!
INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25, &
& lworkl=maxncv*(maxncv+8), zero=0.0d0
DOUBLE PRECISION :: v(maxn,maxncv), workl(lworkl), workd(3*maxn), &
& d(maxncv,2), resid(maxn), w(maxn), &
& tol=0.0d0, sigma
DOUBLE PRECISION, EXTERNAL :: dnrm2
INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, &
& mode1=1, ierr
INTEGER :: ido, ipntr(11), iparam(11)
CHARACTER(len=1) :: bmat='I'
CHARACTER(len=2) :: which='SA'
LOGICAL :: rvec, select(maxncv)
!
INTERFACE
SUBROUTINE dismat(spl, mat)
USE bsplines
USE matrix
TYPE(spline1d), INTENT(in) :: spl
TYPE(gbmat), INTENT(inout) :: mat
END SUBROUTINE dismat
SUBROUTINE disrhs(kdiff, spl, rhs)
USE bsplines
INTEGER, INTENT(in) :: kdiff
TYPE(spline1d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
END SUBROUTINE disrhs
SUBROUTINE meshdist(coefs, x, nx)
DOUBLE PRECISION, INTENT(in) :: coefs(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
END SUBROUTINE meshdist
END INTERFACE
!
NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number oh intevals in x
nidbas = 3 ! Degree of splines
ngauss = 4 ! Number of Gauss points/interval
kdiff = 2 ! Exponent of differential problem
nlppform = .TRUE. ! Use PPFORM for gridval or not
coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
! see function FDIST in MESHDIST
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x axis
!
ALLOCATE(xgrid(0:nx))
xgrid(0) = 0.0d0
xgrid(nx) = 1.0d0
CALL meshdist(coefs, xgrid, nx)
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE1D Result File')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
CALL attach(fid, '/', 'NGAUSS', ngauss)
CALL attach(fid, '/', 'KDIFF', kdiff)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
t0 = seconds()
CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
CALL get_dim(splx, nrank) ! Rank of the FE matrix
WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
!
! FE matrix assembly
!
kl = nidbas
ku = kl
!!$ CALL init(kl, ku, nrank, 1, mat)
!!$ WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank
CALL init(nrank, 1, mat)
!!$ CALL dismat(splx, mat)
CALL conmat(splx, mat, coefeq)
!
ALLOCATE(arr(nrank))
!!$ WRITE(*,'(/a)') 'Matrice before BC'
!!$ DO i=1,nrank
!!$ CALL getrow(mat, i, arr)
!!$ WRITE(*,'(12f8.3)') arr, SUM(arr)
!!$ END DO
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(kdiff, splx, rhs)
!!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs
!
! Set BC f(r=1) = 0 on matrix
!
arr(1:nrank-1) = 0.0d0
arr(nrank) = 1.0d0
CALL putrow(mat, nrank, arr)
CALL putcol(mat, nrank, arr)
tmat = seconds() - t0
!
!===========================================================================
! 3.0 Eigevalue problem
!
! Using Lapack dsyev
!
t0 = seconds()
ALLOCATE(mata(nrank,nrank))
ALLOCATE(eigvals(nrank))
ALLOCATE(work(3*nrank))
mata=0.0d0
DO j=1,nrank
mata(j:nrank,j) = mat%val(j:nrank,j)
END DO
CALL putarr(fid, '/MAT', mata, 'matrix A')
CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info)
teig = seconds() - t0
PRINT*,'Info from DSYEV', info, arr(1)
IF(info.EQ.0) THEN
CALL putarr(fid, '/EIGVS', eigvals, 'eigenvalues of A')
CALL putarr(fid, '/EIGVECS', mata, 'eigenvectors of A')
WRITE(*,'(a/(10f10.4))') 'eigval', eigvals
END IF
!
! Using Arpack
!
ido = 0
iparam(1) = ishfts
iparam(3) = maxitr
iparam(7) = mode1
!
IF(nev.GT.0) THEN
t0 = seconds()
DO ! ARPACK reverse communication loop
CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, &
& ncv, v, maxn, iparam, ipntr, workd, workl,&
& lworkl, info)
IF(ido.EQ.-1 .OR. ido.EQ.1) THEN
PRINT*, 'Error in _saupd with info =', info
WRITE(*,'(a/(10i4))') 'ipntr', ipntr
CALL av(nrank,workd(ipntr(1)), workd(ipntr(2)))
CYCLE
END IF
IF(info .LT. 0) THEN
PRINT*, 'Error in _saupd with info =', info
ELSE
rvec = .TRUE.
CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, &
& bmat, nrank, which, nev, tol, resid, ncv, v, maxn, &
& iparam, ipntr, workd, workl, lworkl, ierr )
IF( ierr .NE. 0 ) THEN
PRINT*,'Error in _seupd with ierr =', ierr
ELSE
nconv = iparam(5)
PRINT*, '--- eigenvalues and error ---'
DO j=1,nconv ! Residual norms
!!$ CALL av(nrank,v(1,j), w) ! d(1,j) is the j^th eigenvalue
!!$ CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1)
!!$ d(j,2) = dnrm2(nrank, w, 1)
!!$ d(j,2) = d(j,2)/abs(d(j,1))
WRITE(*,'(i3,2(1pe12.4))') j, d(j,1), eigvals(j)-d(j,1)
!!$ CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A')
!!$ CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A')
END DO
EXIT
END IF
END IF
END DO ! End of ARPACK reverse communication loop
tarpack = seconds()-t0
END IF
!===========================================================================
! 9.0 Epilogue
!
WRITE(*,'(a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig
WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack
!
DEALLOCATE(xgrid, rhs, sol)
DEALLOCATE(arr)
CALL destroy(mat)
CALL destroy_sp(splx)
CALL closef(fid)
CONTAINS
SUBROUTINE av(n,v,w)
!
! Matrix vector product: w <- Av
INTEGER, INTENT(in) :: n
DOUBLE PRECISION, INTENT(in) :: v(*)
DOUBLE PRECISION, INTENT(out) :: w(*)
w(1:n) = vmx(mat,v(1:n))
END SUBROUTINE av
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
! Weak form = Int(x*dw/dx*dt/dx)dx
!
c(1) = x
idt(1) = 1
idw(1) = 1
END SUBROUTINE coefeq
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function defined in FDIST
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
SUBROUTINE dismat(spl, mat)
!
! Assembly FE matrix mat using spline spl
!
USE bsplines
USE matrix
IMPLICIT NONE
TYPE(spline1d), INTENT(in) :: spl
TYPE(gbmat), INTENT(inout) :: mat
INTEGER :: nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION :: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order
DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, nrank, nx, nidbas)
ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
CALL coefeq(xgauss(igauss), idert, iderw, coefs)
DO iterm=1,kterms
DO jt=0,nidbas
DO iw=0,nidbas
contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
& fun(iw,iderw(iterm)) * wgauss(igauss)
irow=i+iw; jcol=i+jt
CALL updtmat(mat, irow, jcol, contrib)
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
DEALLOCATE(iderw, idert, coefs)
!
CONTAINS
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
!
! Weak form = Int(x*dw/dx*dt/dx)dx
!
c(1) = x
idt(1) = 1
idw(1) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!+++
SUBROUTINE disrhs(kdiff, spl, rhs)
!
! Assenbly the RHS using spline spl
!
USE bsplines
IMPLICIT NONE
INTEGER, INTENT(in) :: kdiff
TYPE(spline1d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
INTEGER :: nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, left
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, nrank, nx, nidbas)
!!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
!
ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
!!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
rhs(1:nrank) = 0.0d0
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
!!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
left = i-1
!!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x,k)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(in) :: k
rhseq = k*k*x**(k-1)
END FUNCTION rhseq
END SUBROUTINE disrhs
diff --git a/examples/pde1d_eig_ge.f90 b/examples/pde1d_eig_ge.f90
index 713a66e..a36f688 100644
--- a/examples/pde1d_eig_ge.f90
+++ b/examples/pde1d_eig_ge.f90
@@ -1,474 +1,474 @@
!>
!> @file pde1d_eig_ge.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Solving the following 1d differential eqation using splines:
!
! -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0
! exact solution: f(r) = 1 - r^k
!
USE bsplines
USE matrix
USE futils
USE conmat_mod
IMPLICIT NONE
INTEGER :: nx, nidbas, ngauss, kdiff
INTEGER :: i, nrank, kl, ku
LOGICAL :: nlppform
DOUBLE PRECISION :: coefs(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol
TYPE(spline1d) :: splx
TYPE(gemat) :: mat
!
CHARACTER(len=128) :: file='pde1d.h5'
INTEGER :: fid
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack
!
DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvecsr(:,:), eigvecsl(:,:), &
& wr(:), wi(:), work(:)
INTEGER :: j, info
!
INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=maxn
DOUBLE PRECISION :: v(maxn,maxncv),workd(3*maxn), workev(3*maxncv), &
& d(maxncv,2), resid(maxn), w(maxncv,maxn), &
& zero=0.0d0, tol=0.0d0, sigmar, sigmai
DOUBLE PRECISION, ALLOCATABLE :: workl(:)
DOUBLE PRECISION, EXTERNAL :: dnrm2
INTEGER :: nev=10, ncv=30, ishfts=1, maxitr=300, nconv, &
& mode1=1, ierr, lworkl
INTEGER :: ido, ipntr(11), iparam(11)
CHARACTER(len=1) :: bmat='I'
CHARACTER(len=2) :: which='SA'
LOGICAL :: rvec, select(maxncv)
!
INTERFACE
SUBROUTINE dismat(spl, mat)
USE bsplines
USE matrix
TYPE(spline1d), INTENT(in) :: spl
TYPE(gbmat), INTENT(inout) :: mat
END SUBROUTINE dismat
SUBROUTINE disrhs(kdiff, spl, rhs)
USE bsplines
INTEGER, INTENT(in) :: kdiff
TYPE(spline1d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
END SUBROUTINE disrhs
SUBROUTINE meshdist(coefs, x, nx)
DOUBLE PRECISION, INTENT(in) :: coefs(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
END SUBROUTINE meshdist
END INTERFACE
!
NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, &
& nev, ncv, which
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number oh intevals in x
nidbas = 3 ! Degree of splines
ngauss = 4 ! Number of Gauss points/interval
kdiff = 2 ! Exponent of differential problem
nlppform = .TRUE. ! Use PPFORM for gridval or not
coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
! see function FDIST in MESHDIST
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x axis
!
ALLOCATE(xgrid(0:nx))
xgrid(0) = 0.0d0
xgrid(nx) = 1.0d0
CALL meshdist(coefs, xgrid, nx)
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE1D Result File')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
CALL attach(fid, '/', 'NGAUSS', ngauss)
CALL attach(fid, '/', 'KDIFF', kdiff)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
t0 = seconds()
CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
CALL get_dim(splx, nrank) ! Rank of the FE matrix
WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
!
! FE matrix assembly
!
kl = nidbas
ku = kl
!!$ CALL init(kl, ku, nrank, 1, mat)
WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank
CALL init(nrank, 1, mat)
!!$ CALL dismat(splx, mat)
CALL conmat(splx, mat, coefeq)
!
ALLOCATE(arr(nrank))
!!$ WRITE(*,'(/a)') 'Matrice before BC'
!!$ DO i=1,nrank
!!$ CALL getrow(mat, i, arr)
!!$ WRITE(*,'(12f8.3)') arr, SUM(arr)
!!$ END DO
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(kdiff, splx, rhs)
!!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs
!
! Set BC f(r=1) = 0 on matrix
!
arr(1:nrank-1) = 0.0d0
arr(nrank) = 1.0d0
CALL putrow(mat, nrank, arr)
CALL putcol(mat, nrank, arr)
tmat = seconds() - t0
!
!===========================================================================
! 3.0 Eigevalue problem
!
! Using Lapack dgeev
!
t0 = seconds()
ALLOCATE(mata(nrank,nrank))
ALLOCATE(eigvecsr(nrank,nrank))
ALLOCATE(eigvecsl(nrank,nrank))
ALLOCATE(work(4*nrank))
ALLOCATE(wr(nrank), wi(nrank))
mata(:,:) = mat%val(:,:)
CALL putarr(fid, '/MAT', mata, 'matrix A')
!!$ CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info)
CALL dgeev('N', 'V', nrank, mata, nrank, wr, wi, eigvecsl, SIZE(eigvecsl,1), &
& eigvecsr, SIZE(eigvecsr,1), work, SIZE(work), info)
teig = seconds() - t0
PRINT*,'Info from DGEEV', info, arr(1)
IF(info.EQ.0) THEN
CALL putarr(fid, '/REIGVS', wr, 'Real of eigenvalues of A')
CALL putarr(fid, '/IEIGVS', wi, 'Imag of eigenvalues of A')
CALL putarr(fid, '/EIGVECL', eigvecsl, 'left eigenvalues of A')
CALL putarr(fid, '/EIGVECR', eigvecsr, 'right eigenvalues of A')
WRITE(*,'(a/(10f10.4))') 'Real eigval', wr
WRITE(*,'(a/(10f10.4))') 'Imag eigval', wi
END IF
!
! Using Arpack
!
ido = 0
iparam(1) = ishfts
iparam(3) = maxitr
iparam(7) = mode1
!
lworkl = 3*ncv**2+6*ncv
ALLOCATE(workl(lworkl))
!
t0 = seconds()
DO ! ARPACK reverse communication loop
CALL dnaupd(ido, bmat, nrank, which, nev, tol, resid, &
& ncv, v, maxn, iparam, ipntr, workd, workl,&
& lworkl, info)
IF(ido.EQ.-1 .OR. ido.EQ.1) THEN
CALL av(nrank,workd(ipntr(1)), workd(ipntr(2)))
CYCLE
END IF
PRINT*, 'INFO =', info
IF(info .LT. 0) THEN
PRINT*, 'Error in dnaupd with info =', info
ELSE
rvec = .TRUE.
CALL dneupd(rvec, 'A', select, d, d(1,2), v, size(v,1), &
& sigmar, sigmai, workev, bmat, nrank, which, nev, tol, &
& resid, ncv, v, size(v,1), iparam, ipntr, workd, workl, &
& lworkl, ierr )
IF( ierr .NE. 0 ) THEN
PRINT*,'Error in dneupd with ierr =', ierr
ELSE
nconv = iparam(5)
PRINT*, '--- Real eigenvalues and comprison with Lapack results ---'
! eiegvalues and diff with Lapack results
DO j=1,nconv
WRITE(*,'(i3,3(1pe12.4))') j, d(j,1), wr(j), wr(j)-d(j,1)
END DO
PRINT*, '--- Imag eigenvalues and comprison with Lapack results ---'
DO j=1,nconv
WRITE(*,'(i3,3(1pe12.4))') j, d(j,2), wi(j), wi(j)-d(j,2)
END DO
!!$ CALL av(nrank,v(1,j), w) ! d(1,j) is the j^th eigenvalue
!!$ CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1)
!!$ d(j,2) = dnrm2(nrank, w, 1)
!!$ d(j,2) = d(j,2)/abs(d(j,1))
CALL putarr(fid, '/EIGVS', d(1:nconv,:), 'ARPACK eigenvalues of A')
!!$ CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A')
!!$ END DO
END IF
EXIT
END IF
END DO ! End of ARPACK reverse communication loop
tarpack = seconds()-t0
!===========================================================================
! 9.0 Epilogue
!
WRITE(*,'(a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig
WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack
!
DEALLOCATE(xgrid, rhs, sol)
DEALLOCATE(arr)
CALL destroy(mat)
CALL destroy_sp(splx)
CALL closef(fid)
CONTAINS
SUBROUTINE av(n,v,w)
!
! Matrix vector product: w <- Av
INTEGER, INTENT(in) :: n
DOUBLE PRECISION, INTENT(in) :: v(*)
DOUBLE PRECISION, INTENT(out) :: w(*)
w(1:n) = vmx(mat,v(1:n))
END SUBROUTINE av
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
! Weak form = Int(x*dw/dx*dt/dx)dx
!
c(1) = x
idt(1) = 1
idw(1) = 1
END SUBROUTINE coefeq
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function defined in FDIST
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
SUBROUTINE dismat(spl, mat)
!
! Assembly FE matrix mat using spline spl
!
USE bsplines
USE matrix
IMPLICIT NONE
TYPE(spline1d), INTENT(in) :: spl
TYPE(gbmat), INTENT(inout) :: mat
INTEGER :: nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION :: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order
DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, nrank, nx, nidbas)
ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
CALL coefeq(xgauss(igauss), idert, iderw, coefs)
DO iterm=1,kterms
DO jt=0,nidbas
DO iw=0,nidbas
contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
& fun(iw,iderw(iterm)) * wgauss(igauss)
irow=i+iw; jcol=i+jt
CALL updtmat(mat, irow, jcol, contrib)
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
DEALLOCATE(iderw, idert, coefs)
!
CONTAINS
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
!
! Weak form = Int(x*dw/dx*dt/dx)dx
!
c(1) = x
idt(1) = 1
idw(1) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!+++
SUBROUTINE disrhs(kdiff, spl, rhs)
!
! Assenbly the RHS using spline spl
!
USE bsplines
IMPLICIT NONE
INTEGER, INTENT(in) :: kdiff
TYPE(spline1d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
INTEGER :: nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, left
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, nrank, nx, nidbas)
!!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
!
ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
!!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
rhs(1:nrank) = 0.0d0
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
!!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
left = i-1
!!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x,k)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(in) :: k
rhseq = k*k*x**(k-1)
END FUNCTION rhseq
END SUBROUTINE disrhs
diff --git a/examples/pde1d_eig_zcsr.f90 b/examples/pde1d_eig_zcsr.f90
index d467456..eaaebe4 100644
--- a/examples/pde1d_eig_zcsr.f90
+++ b/examples/pde1d_eig_zcsr.f90
@@ -1,481 +1,481 @@
!>
!> @file pde1d_eig_zcsr.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Solving the following 1d differential eqation using splines:
!
! -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0
! exact solution: f(r) = 1 - r^k
!
USE bsplines
USE csr
USE futils
USE f95_precision, ONLY: WP => DP
USE lapack95, ONLY: geev
IMPLICIT NONE
INTEGER :: nx, nidbas, ngauss, kdiff
INTEGER :: i, nrank, kl, ku
LOGICAL :: nlppform
DOUBLE PRECISION :: coefs(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol
TYPE(spline1d) :: splx
TYPE(zcsr_mat) :: mat
!
CHARACTER(len=128) :: file='pde1d.h5'
INTEGER :: fid, ffid
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack
!
! Lapack95 GEEV arguments
COMPLEX(WP), ALLOCATABLE :: mata(:,:), w(:)
REAL(WP), ALLOCATABLE :: WR(:), WI(:)
INTEGER :: j, info
!
INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25
INTEGER :: lworkl, zero=0.0d0
DOUBLE PRECISION :: d(maxncv,2), tol=0.0d0, rwork(maxncv)
DOUBLE COMPLEX :: v(maxn,maxncv), resid(maxn), sigma
DOUBLE COMPLEX :: workd(3*maxncv), lwork(2*maxn)
DOUBLE COMPLEX, ALLOCATABLE :: workl(:), vl(:,:), vr(:,:)
DOUBLE PRECISION, EXTERNAL :: dnrm2
INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, &
& mode1=1, ierr
INTEGER :: ido, ipntr(14), iparam(11)
CHARACTER(len=1) :: bmat='I'
CHARACTER(len=2) :: which='SA'
LOGICAL :: rvec, select(maxncv)
!
INTERFACE
SUBROUTINE dismat(spl, mat)
USE bsplines
USE csr
TYPE(spline1d), INTENT(in) :: spl
TYPE(zcsr_mat), INTENT(inout) :: mat
END SUBROUTINE dismat
SUBROUTINE disrhs(kdiff, spl, rhs)
USE bsplines
INTEGER, INTENT(in) :: kdiff
TYPE(spline1d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
END SUBROUTINE disrhs
SUBROUTINE meshdist(coefs, x, nx)
DOUBLE PRECISION, INTENT(in) :: coefs(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
END SUBROUTINE meshdist
END INTERFACE
!
NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number oh intevals in x
nidbas = 3 ! Degree of splines
ngauss = 4 ! Number of Gauss points/interval
kdiff = 2 ! Exponent of differential problem
nlppform = .TRUE. ! Use PPFORM for gridval or not
coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
! see function FDIST in MESHDIST
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x axis
!
ALLOCATE(xgrid(0:nx))
xgrid(0) = 0.0d0
xgrid(nx) = 1.0d0
CALL meshdist(coefs, xgrid, nx)
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE1D Result File')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
CALL attach(fid, '/', 'NGAUSS', ngauss)
CALL attach(fid, '/', 'KDIFF', kdiff)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
t0 = seconds()
CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
CALL get_dim(splx, nrank) ! Rank of the FE matrix
WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
!
! FE matrix assembly
!
kl = nidbas
ku = kl
WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank
CALL init(nrank, 1, mat)
CALL dismat(splx, mat)
!!$ CALL conmat(splx, mat, coefeq)
CALL to_mat(mat)
PRINT*,'MAT after to_mat', mat%val
CALL creatf('pde1d_eig.h5', ffid, 'PDE1D Result File', real_prec='D')
PRINT*, 'rank', mat%rank
PRINT*, 'nnz', mat%nnz
PRINT*, 'irow', mat%irow
PRINT*, 'cols', mat%cols
CALL putmat(ffid,'/MAT',mat,'FE matrix')
PRINT*, 'MAT',mat%val
CALL closef(ffid)
STOP
!
!
ALLOCATE(arr(nrank))
!!$ WRITE(*,'(/a)') 'Matrice before BC'
!!$ DO i=1,nrank
!!$ CALL getrow(mat, i, arr)
!!$ WRITE(*,'(12f8.3)') arr, SUM(arr)
!!$ END DO
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(kdiff, splx, rhs)
!!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs
!!$!
!!$! Set BC f(r=1) = 0 on matrix
!!$!
!!$ arr(1:nrank-1) = 0.0d0
!!$ arr(nrank) = 1.0d0
!!$ CALL putrow(mat, nrank, arr)
!!$ CALL putcol(mat, nrank, arr)
CALL putmat(fid,'/MATA', mat, 'FE matrix')
tmat = seconds() - t0
!
!===========================================================================
! 3.0 Eigevalue problem
!
! Using Lapack dsyev
!
t0 = seconds()
ALLOCATE(mata(nrank,nrank))
ALLOCATE(w(nrank))
ALLOCATE(wr(nrank), wi(nrank))
ALLOCATE(vl(nrank,nrank), vr(nrank,nrank))
mata=0.0d0
DO j=1,nrank
CALL getcol(mat, j, mata(:,j)) ! convert to dense matrix mata
END DO
CALL putarr(fid, '/MAT', mata, 'matrix A')
CALL geev(mata, w)
wr(:) = REAL(w(:))
wi(:) = AIMAG(w(:))
teig = seconds() - t0
PRINT*,'Info from ZGEEV', info, arr(1)
IF(info.EQ.0) THEN
CALL putarr(fid, '/REIGVS', wr, 'eigenvalues of A')
CALL putarr(fid, '/IEIGVS', wi, 'eigenvectors of A')
WRITE(*,'(a/(10f10.4))') 'Real eigval', wr
WRITE(*,'(a/(10f10.4))') 'Imag eigval', wi
END IF
!
! Using Arpack
!
ido = 0
iparam(1) = ishfts
iparam(3) = maxitr
iparam(7) = mode1
!
lworkl = 3*ncv**2+5*ncv
ALLOCATE(workl(lworkl))
!
IF(nev.GT.0) THEN
t0 = seconds()
DO ! ARPACK reverse communication loop
CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, ncv, &
& v, SIZE(v,1), iparam, ipntr, workd, workl, lworkl, &
& rwork, info)
IF(ido.EQ.-1 .OR. ido.EQ.1) THEN
!!$ WRITE(*,'(a/(10i4))') 'ipntr', ipntr
CALL av(nrank,workd(ipntr(1)), workd(ipntr(2)))
CYCLE
END IF
IF(info .LT. 0) THEN
PRINT*, 'Error in _saupd with info =', info
ELSE
rvec = .TRUE.
CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, &
& bmat, nrank, which, nev, tol, resid, ncv, v, maxn, &
& iparam, ipntr, workd, workl, lworkl, ierr )
IF( ierr .NE. 0 ) THEN
PRINT*,'Error in _seupd with ierr =', ierr
ELSE
nconv = iparam(5)
PRINT*, '--- eigenvalues and error ---'
DO j=1,nconv ! Residual norms
!!$ CALL av(nrank,v(1,j), w) ! d(1,j) is the j^th eigenvalue
!!$ CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1)
!!$ d(j,2) = dnrm2(nrank, w, 1)
!!$ d(j,2) = d(j,2)/abs(d(j,1))
WRITE(*,'(2(1pe12.4))') d(j,1), eigvals(j)-d(j,1)
!!$ CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A')
!!$ CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A')
END DO
EXIT
END IF
END IF
END DO ! End of ARPACK reverse communication loop
tarpack = seconds()-t0
END IF
!===========================================================================
! 9.0 Epilogue
!
WRITE(*,'(a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig
WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack
!
DEALLOCATE(xgrid, rhs, sol)
DEALLOCATE(arr)
CALL destroy(mat)
CALL destroy_sp(splx)
CALL closef(fid)
CONTAINS
SUBROUTINE av(n,v,w)
!
! Matrix vector product: w <- Av
INTEGER, INTENT(in) :: n
DOUBLE COMPLEX, INTENT(in) :: v(*)
DOUBLE COMPLEX, INTENT(out) :: w(*)
w(1:n) = vmx(mat,v(1:n))
END SUBROUTINE av
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
! Weak form = Int(x*dw/dx*dt/dx)dx
!
c(1) = x
idt(1) = 1
idw(1) = 1
END SUBROUTINE coefeq
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function defined in FDIST
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
SUBROUTINE dismat(spl, mat)
!
! Assembly FE matrix mat using spline spl
!
USE bsplines
USE csr
IMPLICIT NONE
TYPE(spline1d), INTENT(in) :: spl
TYPE(zcsr_mat), INTENT(inout) :: mat
INTEGER :: nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE COMPLEX :: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order
DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, nrank, nx, nidbas)
ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
CALL coefeq(xgauss(igauss), idert, iderw, coefs)
DO iterm=1,kterms
DO jt=0,nidbas
DO iw=0,nidbas
contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
& fun(iw,iderw(iterm)) * wgauss(igauss)
irow=i+iw; jcol=i+jt
CALL updtmat(mat, irow, jcol, contrib)
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
DEALLOCATE(iderw, idert, coefs)
!
CONTAINS
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
!
! Weak form = Int(x*dw/dx*dt/dx)dx
!
c(1) = x
idt(1) = 1
idw(1) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!+++
SUBROUTINE disrhs(kdiff, spl, rhs)
!
! Assenbly the RHS using spline spl
!
USE bsplines
IMPLICIT NONE
INTEGER, INTENT(in) :: kdiff
TYPE(spline1d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
INTEGER :: nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, left
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, nrank, nx, nidbas)
!!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
!
ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
!!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
rhs(1:nrank) = 0.0d0
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
!!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
left = i-1
!!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x,k)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(in) :: k
rhseq = k*k*x**(k-1)
END FUNCTION rhseq
END SUBROUTINE disrhs
diff --git a/examples/pde1d_eig_zmumps.f90 b/examples/pde1d_eig_zmumps.f90
index ec17841..644f680 100644
--- a/examples/pde1d_eig_zmumps.f90
+++ b/examples/pde1d_eig_zmumps.f90
@@ -1,460 +1,460 @@
!>
!> @file pde1d_eig_zmumps.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Solving the following 1d differential eqation using splines:
!
! Solve the standard eigenvalue:
! A*x = \lambda *x or inv(A)*x = 1/\lambda * x using Arpack and MUMPS.
! where A is obtained from discretozation of
! -d/dr[r d/dr] f = k^2 r^(k-1)
!
USE bsplines
USE mumps_bsplines
USE futils
IMPLICIT NONE
INTEGER :: nx, nidbas, ngauss, kdiff
INTEGER :: i, nrank
LOGICAL :: nlppform
DOUBLE PRECISION :: coefs(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid
TYPE(spline1d) :: splx
TYPE(zmumps_mat) :: mat
!
INTEGER :: ierr
INTEGER :: fid
CHARACTER(len=32) :: str
DOUBLE PRECISION :: seconds, t0, tmat, tfac, tarpack
!
! Arpack: Solve the standard eigenvalue problem
!
INTEGER :: nev = 10, ncv = 10
LOGICAL :: nlinv = .FALSE. ! Solve inv(A) = 1/\lambda * x if nlinv=.TRUE.
CHARACTER(len=2) :: which='SM'
INTEGER :: info=0 ! Use random vector to start the Arnoldi iterations
INTEGER :: ido=0 ! Reverse communications
LOGICAL :: rvec
LOGICAL, ALLOCATABLE :: select(:)
INTEGER :: iparam(11), ipntr(14), nconv
DOUBLE PRECISION :: tol=0.0d0
CHARACTER(len=1) :: bmat='I'
!
INTEGER :: lworkl
DOUBLE COMPLEX, ALLOCATABLE :: workl(:), workd(:), workev(:)
DOUBLE COMPLEX, ALLOCATABLE :: eig_vals(:), eig_vecs(:,:), resid(:)
DOUBLE COMPLEX :: sigma
DOUBLE PRECISION, ALLOCATABLE :: rwork(:)
!
INTERFACE
SUBROUTINE dismat(spl, mat)
USE bsplines
USE mumps_bsplines
TYPE(spline1d), INTENT(in) :: spl
TYPE(zmumps_mat), INTENT(inout) :: mat
END SUBROUTINE dismat
SUBROUTINE disrhs(kdiff, spl, rhs)
USE bsplines
INTEGER, INTENT(in) :: kdiff
TYPE(spline1d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
END SUBROUTINE disrhs
SUBROUTINE meshdist(coefs, x, nx)
DOUBLE PRECISION, INTENT(in) :: coefs(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
END SUBROUTINE meshdist
END INTERFACE
!
NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, &
& nev, ncv, nlinv, which, tol
!===========================================================================
! 1.0 Prologue
!
CALL mpi_init(ierr)
!
! Read in data specific to run
!
nx = 8 ! Number oh intevals in x
nidbas = 3 ! Degree of splines
ngauss = 4 ! Number of Gauss points/interval
kdiff = 2 ! Exponent of differential problem
nlppform = .TRUE. ! Use PPFORM for gridval or not
coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
! see function FDIST in MESHDIST
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x axis
!
ALLOCATE(xgrid(0:nx))
xgrid(0) = 0.0d0
xgrid(nx) = 1.0d0
CALL meshdist(coefs, xgrid, nx)
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
t0 = seconds()
CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
CALL get_dim(splx, nrank) ! Rank of the FE matrix
WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
!
! FE matrix assembly
!
WRITE(*,'(a,i6)') ' nrank', nrank
CALL init(nrank, 1, mat)
CALL dismat(splx, mat)
CALL to_mat(mat)
tmat = seconds() - t0
!
mat%mumps_par%IRN => mat%mumps_par%IRN_loc ! Work around for single proc.
PRINT*, 'nnz_loc', mat%nnz_loc
PRINT*, 'mat%mumps_par%N', mat%mumps_par%N
PRINT*, 'mat%mumps_par%NZ_loc', mat%mumps_par%NZ_loc
PRINT*, 'size of mat%mumps_par%IRN', SIZE(mat%mumps_par%IRN)
PRINT*, 'mat%istart,mat%iend', mat%istart,mat%iend
!
CALL creatf('pde1d_eig_zmumps.h5', fid, 'PDE1D Result File', real_prec='d')
PRINT*, 'rank', mat%rank
PRINT*, 'nnz', mat%nnz
!
CALL putmat(fid,'/MAT',mat,'FE matrix')
!
IF(nlinv) THEN
t0 = seconds()
CALL factor(mat)
tfac = seconds()-t0
END IF
!===========================================================================
! 3.0 Solve the standard eigenvalue problem
!
lworkl = 3*ncv**2 + 5*ncv
ALLOCATE(workl(lworkl))
ALLOCATE(workd(3*nrank))
ALLOCATE(workev(2*ncv))
ALLOCATE(eig_vals(ncv), eig_vecs(nrank,ncv))
ALLOCATE(resid(nrank))
ALLOCATE(rwork(ncv))
!
iparam(1) = 1 ! shfts
iparam(3) = 300 ! Max. number of iterations
iparam(7) = 1 ! Regular mode
!
! The reverse communication loop
!
t0 = seconds()
DO
CALL znaupd (ido, bmat, nrank, which, nev, tol, resid, ncv, &
& eig_vecs, nrank, iparam, ipntr, workd, workl, lworkl, &
& rwork, info )
!
IF(ido .EQ. -1 .OR. ido .EQ. 1) THEN ! Compute A*v
CALL av(nrank, workd(ipntr(1)), workd(ipntr(2)))
CYCLE
END IF
!
IF(info .LT. 0) THEN ! Error
PRINT*, 'Error in _naupd with info =', info
EXIT
ELSE
rvec = .TRUE.
ALLOCATE(select(ncv))
CALL zneupd (rvec, 'A', select, eig_vals, eig_vecs, nrank, &
& sigma, workev, bmat, nrank, which, nev, tol, resid, &
& ncv, eig_vecs, nrank, iparam, ipntr, workd, workl, lworkl,&
& rwork, ierr)
IF(ierr .NE. 0) THEN
PRINT*, 'Error in _neupd with ierr =', ierr
EXIT
ELSE
nconv = iparam(5)
PRINT*,'Number of converged eigenvalues', nconv
IF(nlinv) THEN
eig_vals(1:nconv) = (1.d0,0.0d0) / eig_vals(1:nconv)
END IF
WRITE(*,'(2(1pe12.3))') eig_vals(1:nconv)
CALL putarr(fid, '/eig_vals', eig_vals(1:nconv))
CALL putarr(fid, '/eig_vecs', eig_vecs(1:nrank,1:nconv))
DO i=1,nconv
!!$ WRITE(*,'(/a,2(pe20.6))') '*** eigen value =', eig_vals(i)
!!$ WRITE(*,'(a/(10(1pe12.4)))') 'Real of eigen vector', &
!!$ & REAL(eig_vecs(1:nrank,i))
!!$ WRITE(*,'(a/(10(1pe12.4)))') 'Imag of eigen vector', &
!!$ & aimag(eig_vecs(1:nrank,i))
WRITE(str,'(a,i3.3)') '/eig_vecs_',i
CALL putarr(fid, TRIM(str), eig_vecs(1:nrank,i))
END DO
EXIT
END IF
END IF
END DO ! End of reverse commuinication loop
IF(info .EQ. 1) THEN
PRINT*, 'Maximum number of iterations reached!'
PRINT*, 'IPARAM(5) =', iparam(5)
END IF
PRINT*, 'Number of Arnoldi iterations', iparam(3)
tarpack = seconds() - t0
!===========================================================================
! 9.0 Epilogue
!
WRITE(*,'(a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
IF(nlinv) THEN
WRITE(*,'(a,1pe12.3)') 'Matrice factorization time (s) ', tfac
END IF
WRITE(*,'(a,1pe12.3)') 'Arpack time (s) ', tarpack
!
DEALLOCATE(xgrid)
CALL destroy(mat)
CALL destroy_sp(splx)
CALL closef(fid)
CALL mpi_finalize(ierr)
!
CONTAINS
SUBROUTINE av(n,v,w)
!
INTEGER, INTENT(in) :: n
DOUBLE COMPLEX, INTENT(in) :: v(*)
DOUBLE COMPLEX, INTENT(out) :: w(*)
!
IF(nlinv) THEN
w(1:n) = v(1:n)
CALL bsolve(mat,w(1:n)) ! Solve A*w = v or w=inv(A)*v
ELSE
w(1:n) = vmx(mat,v(1:n)) ! A*v matrix product
END IF
END SUBROUTINE av
!
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
! Weak form = Int(x*dw/dx*dt/dx)dx
!
c(1) = x
idt(1) = 1
idw(1) = 1
END SUBROUTINE coefeq
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function defined in FDIST
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
SUBROUTINE dismat(spl, mat)
!
! Assembly FE matrix mat using spline spl
!
USE bsplines
USE mumps_bsplines
IMPLICIT NONE
TYPE(spline1d), INTENT(in) :: spl
TYPE(zmumps_mat), INTENT(inout) :: mat
INTEGER :: nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE COMPLEX :: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order
DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, nrank, nx, nidbas)
ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
CALL coefeq(xgauss(igauss), idert, iderw, coefs)
DO iterm=1,kterms
DO jt=0,nidbas
DO iw=0,nidbas
contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
& fun(iw,iderw(iterm)) * wgauss(igauss)
irow=i+iw; jcol=i+jt
CALL updtmat(mat, irow, jcol, contrib)
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
DEALLOCATE(iderw, idert, coefs)
!
CONTAINS
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
!
! Weak form = Int(x*dw/dx*dt/dx)dx
!
c(1) = x
idt(1) = 1
idw(1) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!+++
SUBROUTINE disrhs(kdiff, spl, rhs)
!
! Assenbly the RHS using spline spl
!
USE bsplines
IMPLICIT NONE
INTEGER, INTENT(in) :: kdiff
TYPE(spline1d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
INTEGER :: nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, left
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, nrank, nx, nidbas)
!!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
!
ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
!!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
rhs(1:nrank) = 0.0d0
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
!!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
left = i-1
!!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x,k)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(in) :: k
rhseq = k*k*x**(k-1)
END FUNCTION rhseq
END SUBROUTINE disrhs
diff --git a/examples/pde1dp.f90 b/examples/pde1dp.f90
index 7eb32bd..36c93d4 100644
--- a/examples/pde1dp.f90
+++ b/examples/pde1dp.f90
@@ -1,170 +1,170 @@
!>
!> @file pde1dp.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! 1D PDE with priodic BC
!
USE pde1dp_mod
USE bsplines
USE matrix
USE futils
USE conmat_mod
!
IMPLICIT NONE
CHARACTER(len=128) :: file='pde1dp.h5'
INTEGER :: fid
INTEGER :: nx, nidbas, ngauss, ibcoef
INTEGER :: nrank, kl, ku, dim
DOUBLE PRECISION :: coefs(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, sol, rhs
INTEGER, PARAMETER :: npts=100
DOUBLE PRECISION, DIMENSION(0:npts-1) :: xpts, frhs
DOUBLE PRECISION :: dx, errmx
DOUBLE PRECISION, ALLOCATABLE :: arr(:,:)
TYPE(periodic_mat) :: mat
INTEGER :: i, j
!
NAMELIST /newrun/ nx, nidbas, ngauss, ibcoef, coefs
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number oh intevals in x
nidbas = 3 ! Degree of splines
ngauss = 4 ! Number of Gauss points/interval
ibcoef = 1 ! Index of non-zero spline coef for RHS
coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
! see function FDIST in MESHDIST
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x axis
!
ALLOCATE(xgrid(0:nx))
xgrid(0) = 0.0d0
xgrid(nx) = 1.0d0
CALL meshdist(coefs, xgrid, nx)
WRITE(*,'(/a/(10f8.3))') 'XGRID', xgrid(0:nx)
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE1DP Result File')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
CALL attach(fid, '/', 'NGAUSS', ngauss)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up periodic spline
!
CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.)
nrank = nx ! Rank of the FE matrix
!
! Mass matrix assembly
!
kl = nidbas
ku = kl
CALL init(kl, ku, nrank, 1, mat)
CALL get_dim(splx, dim)
WRITE(*,'(/a,4i6)') 'kl, ku, nrank, dim', kl, ku, nrank, dim
!!$ CALL dismat(splx, mat)
CALL conmat(splx, mat, coefeq_mass)
!
! Store matrix in hdf5 file
!
ALLOCATE(arr(nrank,nrank))
DO j=1,nrank
CALL getcol(mat, j, arr(:,j))
END DO
CALL putarr(fid, '/mata', arr)
DEALLOCATE(arr)
!
! Check RHS constructed using input spline coefs.
!
ALLOCATE(bcoef(0:dim-1))
bcoef = 0.0d0; bcoef(ibcoef-1) = 1.0d0
!
DO i=nrank,dim-1 ! Periodicity to fill array of spline coefs
bcoef(i) = bcoef(MODULO(i,nrank))
END DO
WRITE(*,'(/a/(10f8.3))') 'bcoef from input', bcoef
dx = (xgrid(nx)-xgrid(0))/npts
DO i=0,npts-1
xpts(i) = xgrid(0) + i*dx
frhs(i) = rhseq(xpts(i))
END DO
CALL creatg(fid, '/rhs')
CALL putarr(fid,'/rhs/x', xpts)
CALL putarr(fid,'/rhs/f', frhs)
!
! Assembly RHS and check A*x = f, using method vmx
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(splx, rhs)
sol = vmx(mat, bcoef(0:nrank-1))
WRITE(*,'(/6x,3a12)') 'A*x', 'rhs', 'Err'
errmx = 0.0d0
DO i=1,nrank
WRITE(*,'(i6,3(1pe12.3))') i, sol(i), rhs(i), sol(i)-rhs(i)
errmx=MAX(errmx,ABS(sol(i)-rhs(i)))
END DO
WRITE(*,'(a,1pe12.3)') 'Max. error =', errmx
!
! Factor and solve
!
CALL factor(mat)
CALL bsolve(mat, rhs, sol)
WRITE(*,'(/6x,3a12)') 'Computed', 'Exact', 'Err'
errmx = 0.0d0
DO i=1,nrank
WRITE(*,'(i6,3(1pe12.3))') i, sol(i), bcoef(i-1), sol(i)-bcoef(i-1)
errmx=MAX(errmx,ABS(sol(i)-bcoef(i-1)))
END DO
WRITE(*,'(a,1pe12.3)') 'Max. error =', errmx
!===========================================================================
! 9.0 Clean up
!
DEALLOCATE(xgrid)
DEALLOCATE(bcoef)
DEALLOCATE(rhs, sol)
CALL destroy(mat)
CALL destroy_sp(splx)
CALL closef(fid)
CONTAINS
SUBROUTINE coefeq_mass(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
! Mass matrix
!
c(1) = 1.0d0
idt(1) = 0
idw(1) = 0
END SUBROUTINE coefeq_mass
END PROGRAM main
diff --git a/examples/pde1dp_cmpl.f90 b/examples/pde1dp_cmpl.f90
index 48b2d1e..44ca51b 100644
--- a/examples/pde1dp_cmpl.f90
+++ b/examples/pde1dp_cmpl.f90
@@ -1,403 +1,403 @@
!>
!> @file pde1dp_cmpl.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
MODULE pde1dp_cmpl_mod
USE bsplines
USE matrix
IMPLICIT NONE
!
CONTAINS
SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta)
!
TYPE(spline1d) :: spl
INTEGER, INTENT(in) :: mmode
DOUBLE COMPLEX, INTENT(in) :: alpha, beta
DOUBLE COMPLEX, INTENT(out) :: rhs(:)
!
INTEGER :: dim, nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, it, irow
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE COMPLEX :: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
nrank = SIZE(rhs)
CALL get_dim(spl, dim, nx, nidbas)
!
ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
rhs(:) = 0.0d0
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
contrib = wgauss(igauss) * rhseq(xgauss(igauss))
DO it=0,nidbas
irow=MODULO(i+it-1,nx) + 1 ! Periodic BC
rhs(irow) = rhs(irow) + contrib*fun(it,1)
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
CONTAINS
DOUBLE COMPLEX FUNCTION rhseq(x)
DOUBLE PRECISION, INTENT(in) :: x
DOUBLE PRECISION :: arg
arg = mmode*x
rhseq = (mmode**2*alpha-beta)*COS(arg)
END FUNCTION rhseq
END SUBROUTINE disrhs
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE dismat(spl, mat, alpha, beta)
!
TYPE(spline1d) :: spl
TYPE(zperiodic_mat) :: mat
DOUBLE COMPLEX, INTENT(in) :: alpha, beta
!
INTEGER :: dim, nx, nidbas, ngauss
INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE COMPLEX :: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order
DOUBLE COMPLEX, ALLOCATABLE :: coefs(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, dim, nx, nidbas)
ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
!
! Weak form
!
kterms = mat%mat%nterms
ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
CALL coefeq(xgauss(igauss), idert, iderw, coefs)
DO iterm=1,kterms
DO jt=0,nidbas
DO iw=0,nidbas
contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
& fun(iw,iderw(iterm)) * wgauss(igauss)
irow=MODULO(i+iw-1,nx) + 1 ! Periodic BC
jcol=MODULO(i+jt-1,nx) + 1
CALL updtmat(mat, irow, jcol, contrib)
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
DEALLOCATE(iderw, idert, coefs)
!
CONTAINS
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt))
!
c(1) = alpha
idt(1) = 1
idw(1) = 1
!
c(2) = -beta
idt(2) = 0
idw(2) = 0
END SUBROUTINE coefeq
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE save_mat(fid, label, mat)
!
! Save zperiodic_mat in dense format
!
USE futils
!
INTEGER, INTENT(in) :: fid
CHARACTER(len=*), INTENT(in) :: label
TYPE(zperiodic_mat) :: mat
INTEGER :: j, n
DOUBLE COMPLEX, ALLOCATABLE :: fullmat(:,:)
!
n=mat%mat%rank
ALLOCATE(fullmat(n,n))
DO j=1,n
CALL getcol(mat, j, fullmat(:,j))
END DO
CALL putarr(fid, label, fullmat)
DEALLOCATE(fullmat)
END SUBROUTINE save_mat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
FUNCTION norm2(x)
!
! Compute the 2-norm of complex array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE COMPLEX, INTENT(in) :: x(:)
DOUBLE PRECISION :: sum2
!
sum2 = DOT_PRODUCT(x,x)
norm2 = SQRT(sum2)
END FUNCTION norm2
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE meshdist(mmode, x)
!
! Construct a 1d non-equidistant mesh given a
! mesh distribution function.
!
INTEGER, INTENT(in) :: mmode
DOUBLE PRECISION, INTENT(inout) :: x(0:)
INTEGER :: nx, nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
nx = SIZE(x)-1
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = 2.0 + COS(mmode*x)
END FUNCTION fdist
END SUBROUTINE meshdist
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE pde1dp_cmpl_mod
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
!
PROGRAM main
!
! 1D complex PDE with periodic BC
!
USE pde1dp_cmpl_mod
USE bsplines
USE matrix
USE futils
!
IMPLICIT NONE
TYPE(spline1d) :: splx
TYPE(zperiodic_mat) :: mat
INTEGER :: kl, ku, nrank
!
CHARACTER(len=128) :: file='pde1dp_cmpl.h5'
INTEGER :: fid
INTEGER :: nx, nidbas, ngauss, mmode, npt, dim
LOGICAL :: nlequid
DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0
DOUBLE PRECISION :: dx
DOUBLE COMPLEX :: alpha, beta
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err
DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef
DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION :: err_norm
INTEGER :: i
!
NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number oh intevals in x
nidbas = 3 ! Degree of splines
ngauss = 4 ! Number of Gauss points/interval
nlequid = .TRUE. ! Use exact sol. as mesh dist. function if .FALSE.
mmode = 1 ! Fourier mode
alpha = (1.0, 1.0) ! Complex "diffusion"
beta = 1.0
npt = 100
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x axis
!
ALLOCATE(xgrid(0:nx))
dx = 2.d0*pi/REAL(nx,8)
xgrid = (/ (i*dx,i=0,nx) /)
IF( .NOT. nlequid ) THEN
CALL meshdist(mmode, xgrid)
END IF
WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
CALL putarr(fid, '/xgrid', xgrid)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up periodic spline
!
CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.)
WRITE(*,'(a,l6)') 'nlequid =', nlequid
nrank = nx ! Rank of the FE matrix
!
! FE matrix assembly
!
kl = nidbas
ku = kl
CALL init(kl, ku, nrank, 2, mat)
CALL get_dim(splx, dim)
WRITE(*,'(/a,4i6)') 'kl, ku, nrank, dim', kl, ku, nrank, dim
CALL dismat(splx, mat, alpha, beta)
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(splx, rhs, mmode, alpha, beta)
!
CALL save_mat(fid, '/mat', mat)
CALL putarr(fid, '/rhs', rhs)
!
! Factor and solve
!
CALL factor(mat)
CALL bsolve(mat, rhs, sol)
CALL putarr(fid, '/sol', sol)
!===========================================================================
! 3.0 Check solution
!
! Exact solution
ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt))
dx=2.0d0*pi/REAL(npt,8)
x = (/ (i*dx, i=0,npt) /)
solana = COS(mmode*x)
!
! Prolongate solution using periodicity
!
ALLOCATE(bcoef(dim))
bcoef(1:nrank) = sol(1:nrank)
DO i=nrank+1,dim
bcoef(i) = bcoef(MODULO(i-1,nrank)+1)
END DO
!
! Interpolate field
!
CALL gridval(splx, x, solcal, 0, bcoef)
!
err = ABS(solcal-solana)
CALL putarr(fid, '/x', x)
CALL putarr(fid, '/solana', solana)
CALL putarr(fid, '/solcal', solcal)
CALL putarr(fid, '/err', err)
!
! Compute discretization error norm by Gauss integration
!
err_norm=0.0
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
DO i=1,nx
CALL get_gauss(splx, ngauss, i, xgauss, wgauss)
CALL gridval(splx, xgauss, solcal(1:ngauss), 0)
solana(1:ngauss) = COS(mmode*xgauss)
err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), &
& solana(1:ngauss)-solcal(1:ngauss))
err_norm = err_norm + SUM(wgauss*err(1:ngauss))
END DO
err_norm = SQRT(err_norm)
WRITE(*,'(a,1pe12.3)') 'Discretization error ', err_norm
!
DEALLOCATE(x, solcal, solana, err)
DEALLOCATE(xgauss, wgauss)
DEALLOCATE(bcoef)
!===========================================================================
! 9.0 Clean up
!
DEALLOCATE(xgrid)
DEALLOCATE(rhs, sol)
CALL destroy(mat)
CALL destroy_sp(splx)
CALL closef(fid)
END PROGRAM main
diff --git a/examples/pde1dp_cmpl_dft.f90 b/examples/pde1dp_cmpl_dft.f90
index d866885..9fbf2dc 100644
--- a/examples/pde1dp_cmpl_dft.f90
+++ b/examples/pde1dp_cmpl_dft.f90
@@ -1,290 +1,290 @@
!>
!> @file pde1dp_cmpl_dft.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
MODULE pde1dp_cmpl_dft_mod
USE bsplines
IMPLICIT NONE
!
CONTAINS
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE dismat(spl, mat, alpha, beta)
!
USE bsplines
TYPE(spline1d) :: spl
DOUBLE COMPLEX :: mat(:)
DOUBLE COMPLEX, INTENT(in) :: alpha, beta
!
INTEGER :: dim, nx, nidbas, ngauss, intv, igauss
DOUBLE COMPLEX, ALLOCATABLE :: ft_fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, dim, nx, nidbas)
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
ALLOCATE(ft_fun(0:nx-1,2)) ! Up to first derivative
!
! Weak form: integration on first interval
!
intv = 1
CALL get_gauss(spl, ngauss, intv, xgauss, wgauss)
mat = 0.0d0
DO igauss=1,ngauss
CALL ft_basfun(xgauss(igauss), spl, ft_fun, intv)
mat(:) = mat(:) + wgauss(igauss) * ( &
& alpha*ft_fun(:,2)*CONJG(ft_fun(:,2)) &
& - beta*ft_fun(:,1)*CONJG(ft_fun(:,1)) &
& )
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(ft_fun)
DEALLOCATE(xgauss, wgauss)
!
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta)
!
TYPE(spline1d) :: spl
INTEGER, INTENT(in) :: mmode
DOUBLE COMPLEX, INTENT(in) :: alpha, beta
DOUBLE COMPLEX, INTENT(out) :: rhs(:)
!
INTEGER :: dim, nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, it, irow
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE COMPLEX :: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
nrank = SIZE(rhs)
CALL get_dim(spl, dim, nx, nidbas)
!
ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
rhs(:) = 0.0d0
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
contrib = wgauss(igauss) * rhseq(xgauss(igauss))
DO it=0,nidbas
irow=MODULO(i+it-1,nx) + 1 ! Periodic BC
rhs(irow) = rhs(irow) + contrib*fun(it,1)
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
CONTAINS
DOUBLE COMPLEX FUNCTION rhseq(x)
DOUBLE PRECISION, INTENT(in) :: x
DOUBLE PRECISION :: arg
arg = mmode*x
rhseq = (mmode**2*alpha-beta)*COS(arg)
END FUNCTION rhseq
END SUBROUTINE disrhs
END MODULE pde1dp_cmpl_dft_mod
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
!
PROGRAM main
!
! 1D complex PDE with periodic BC, using DFT
!
USE pde1dp_cmpl_dft_mod
USE bsplines
USE matrix
USE futils
USE fft
!
IMPLICIT NONE
TYPE(spline1d) :: splx
DOUBLE COMPLEX, ALLOCATABLE :: mat(:)
INTEGER ::nrank
!
CHARACTER(len=128) :: file='pde1dp_cmpl_dft.h5'
INTEGER :: fid
INTEGER :: nx, nidbas, ngauss, mmode, npt, dim
DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0
DOUBLE PRECISION :: dx
DOUBLE COMPLEX :: alpha, beta
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err
DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef
DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol_shifted, rhs_shifted
DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION :: err_norm
INTEGER :: i
INTEGER :: k, kmin, kmax
!
NAMELIST /newrun/ nx, nidbas, ngauss, alpha, beta, mmode, npt
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number oh intevals in x
nidbas = 3 ! Degree of splines
ngauss = 4 ! Number of Gauss points/interval
mmode = 1 ! Fourier mode
alpha = (1.0, 1.0) ! Complex "diffusion"
beta = 1.0
npt = 100
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x axis
!
ALLOCATE(xgrid(0:nx))
dx = 2.d0*pi/REAL(nx,8)
xgrid = (/ (i*dx,i=0,nx) /)
WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
CALL putarr(fid, '/xgrid', xgrid)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up periodic spline
!
CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.)
nrank = nx ! Rank of the FE matrix
CALL get_dim(splx, dim)
WRITE(*,'(/a,4i6)') 'nrank, dim', nrank, dim
!
! Init DFT
kmin = -nx/2
kmax = nx/2-1
CALL init_dft(splx, kmin, kmax)
!
! FE matrix assembly in Fourier space
!
ALLOCATE(mat(0:nx-1))
CALL dismat(splx, mat, alpha, beta)
CALL putarr(fid, '/mat', mat)
!
! RHS assembly in real space
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(splx, rhs, mmode, alpha, beta)
!===========================================================================
! 3.0 Solve the dicretized system
!
! Fourier solve
!
CALL putarr(fid, '/rhs', rhs)
!
CALL fourcol(rhs, 1)
!
ALLOCATE(rhs_shifted(kmin:kmax))
ALLOCATE(sol_shifted(kmin:kmax))
DO k=kmin,kmax
rhs_shifted(k) = rhs(MODULO(k+nx,nx)+1)/REAL(nx,8)
END DO
sol_shifted = rhs_shifted / mat
DO k=kmin,kmax
sol(MODULO(k+nx,nx)+1) = sol_shifted(k)
END DO
!
CALL putarr(fid, '/rhs_fft', rhs)
CALL putarr(fid, '/sol_fft', sol)
!
! Solution in real space
!
CALL fourcol(sol,-1)
CALL putarr(fid, '/sol', sol)
!===========================================================================
! 4.0 Check solution
!
! Exact solution
ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt))
dx=2.0d0*pi/REAL(npt,8)
x = (/ (i*dx, i=0,npt) /)
solana = COS(mmode*x)
!
! Prolongate solution using periodicity
!
ALLOCATE(bcoef(dim))
bcoef(1:nrank) = sol(1:nrank)
DO i=nrank+1,dim
bcoef(i) = bcoef(MODULO(i-1,nrank)+1)
END DO
!
! Interpolate field
!
CALL gridval(splx, x, solcal, 0, bcoef)
!
err = ABS(solcal-solana)
CALL putarr(fid, '/x', x)
CALL putarr(fid, '/solana', solana)
CALL putarr(fid, '/solcal', solcal)
CALL putarr(fid, '/err', err)
!
! Compute discretization error norm by Gauss integration
!
err_norm=0.0
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
DO i=1,nx
CALL get_gauss(splx, ngauss, i, xgauss, wgauss)
CALL gridval(splx, xgauss, solcal(1:ngauss), 0)
solana(1:ngauss) = COS(mmode*xgauss)
err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), &
& solana(1:ngauss)-solcal(1:ngauss))
err_norm = err_norm + SUM(wgauss*err(1:ngauss))
END DO
err_norm = SQRT(err_norm)
WRITE(*,'(a,1pe12.3)') 'Discretization error ', err_norm
!!
!===========================================================================
! 9.0 Clean up
!
DEALLOCATE(xgrid)
DEALLOCATE(mat)
CALL destroy_sp(splx)
CALL closef(fid)
END PROGRAM main
diff --git a/examples/pde1dp_cmpl_mumps.f90 b/examples/pde1dp_cmpl_mumps.f90
index ab14f2a..ef2f1d8 100644
--- a/examples/pde1dp_cmpl_mumps.f90
+++ b/examples/pde1dp_cmpl_mumps.f90
@@ -1,478 +1,478 @@
!>
!> @file pde1dp_cmpl_mumps.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
MODULE pde1dp_cmpl_mumps_mod
USE bsplines
USE mumps_bsplines
IMPLICIT NONE
!
CONTAINS
SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta)
!
TYPE(spline1d) :: spl
INTEGER, INTENT(in) :: mmode
DOUBLE COMPLEX, INTENT(in) :: alpha, beta
DOUBLE COMPLEX, INTENT(out) :: rhs(:)
!
INTEGER :: dim, nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, it, irow
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE COMPLEX :: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
nrank = SIZE(rhs)
CALL get_dim(spl, dim, nx, nidbas)
!
ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
rhs(:) = 0.0d0
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
contrib = wgauss(igauss) * rhseq(xgauss(igauss))
DO it=0,nidbas
irow=MODULO(i+it-1,nx) + 1 ! Periodic BC
rhs(irow) = rhs(irow) + contrib*fun(it,1)
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
CONTAINS
DOUBLE COMPLEX FUNCTION rhseq(x)
DOUBLE PRECISION, INTENT(in) :: x
DOUBLE PRECISION :: arg
arg = mmode*x
rhseq = (mmode**2*alpha-beta)*COS(arg)
END FUNCTION rhseq
END SUBROUTINE disrhs
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE dismat(spl, mat, alpha, beta)
!
TYPE(spline1d) :: spl
TYPE(zmumps_mat) :: mat
DOUBLE COMPLEX, INTENT(in) :: alpha, beta
!
INTEGER :: dim, nx, nidbas, ngauss
INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE COMPLEX :: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order
DOUBLE COMPLEX, ALLOCATABLE :: coefs(:)
!
INTEGER :: istart, iend
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, dim, nx, nidbas)
ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
istart = mat%istart
iend = mat%iend
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
CALL coefeq(xgauss(igauss), idert, iderw, coefs)
DO iterm=1,kterms
DO iw=0,nidbas
irow=MODULO(i+iw-1,nx) + 1 ! Periodic BC
IF( irow.GE.istart .AND. irow.LE.iend) THEN
DO jt=0,nidbas
contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
& fun(iw,iderw(iterm)) * wgauss(igauss)
jcol=MODULO(i+jt-1,nx) + 1
CALL updtmat(mat, irow, jcol, contrib)
END DO
END IF
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
DEALLOCATE(iderw, idert, coefs)
!
CONTAINS
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt))
!
c(1) = alpha
idt(1) = 1
idw(1) = 1
!
c(2) = -beta
idt(2) = 0
idw(2) = 0
END SUBROUTINE coefeq
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
FUNCTION norm2(x)
!
! Compute the 2-norm of complex array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE COMPLEX, INTENT(in) :: x(:)
DOUBLE PRECISION :: sum2
!
sum2 = DOT_PRODUCT(x,x)
norm2 = SQRT(sum2)
END FUNCTION norm2
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE meshdist(mmode, x)
!
! Construct a 1d non-equidistant mesh given a
! mesh distribution function.
!
INTEGER, INTENT(in) :: mmode
DOUBLE PRECISION, INTENT(inout) :: x(0:)
INTEGER :: nx, nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
nx = SIZE(x)-1
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = 2.0 + COS(mmode*x)
END FUNCTION fdist
END SUBROUTINE meshdist
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE pde1dp_cmpl_mumps_mod
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
!
PROGRAM main
!
! 1D complex PDE with periodic BC
!
USE pde1dp_cmpl_mumps_mod
USE futils
!
IMPLICIT NONE
INCLUDE 'mpif.h'
TYPE(spline1d) :: splx
TYPE(zmumps_mat) :: mat
TYPE(zmumps_mat) :: newmat
INTEGER :: kl, ku, nrank
!
CHARACTER(len=128) :: file='pde1dp_cmpl_mumps.h5'
INTEGER :: fid
INTEGER :: nx, nidbas, ngauss, mmode, npt, dim
LOGICAL :: nlequid
LOGICAL :: nlsym, nlherm, nlpos
DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0
DOUBLE PRECISION :: dx
DOUBLE COMPLEX :: alpha, beta
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err
DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef
DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: newsol, arow
DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION :: err_norm
INTEGER :: i
INTEGER :: ierr, me
INTEGER :: nzfact
!
NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt, &
& nlsym, nlherm, nlpos
!===========================================================================
! 1.0 Prologue
!
CALL mpi_init(ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
!
! Read in data specific to run
!
nx = 8 ! Number oh intevals in x
nidbas = 3 ! Degree of splines
ngauss = 4 ! Number of Gauss points/interval
nlequid = .TRUE. ! Use exact sol. as mesh dist. function if .FALSE.
mmode = 1 ! Fourier mode
alpha = (1.0, 1.0) ! Complex "diffusion"
beta = 1.0
npt = 100
nlsym = .TRUE. ! Is matrice symmetric
nlherm = .FALSE. ! Is matrice hermitian
nlpos = .TRUE. ! and positive definite ?
!
IF(me.EQ.0) THEN
READ(*,newrun)
WRITE(*,newrun)
END IF
CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nidbas, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(ngauss, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlequid, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(mmode, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(alpha, 1, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(beta, 1, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(npt, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlherm, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlpos, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
!
! Define grid on x axis
!
ALLOCATE(xgrid(0:nx))
dx = 2.d0*pi/REAL(nx,8)
xgrid = (/ (i*dx,i=0,nx) /)
IF( .NOT. nlequid ) THEN
CALL meshdist(mmode, xgrid)
END IF
IF(me.EQ.0) WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
!
! Create hdf5 file
!
IF(me.EQ.0) THEN
CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
CALL putarr(fid, '/xgrid', xgrid)
END IF
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up periodic spline
!
CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.)
CALL get_dim(splx, dim)
nrank = nx ! Rank of the FE matrix
!
! FE matrix assembly
!
CALL init(nrank, 2, mat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos)
WRITE(*,'(a,i4.4,a,3i6)') 'PE', me, ' istart, iend, nloc', mat%istart, mat%iend, &
& mat%iend-mat%istart+1
!
CALL dismat(splx, mat, alpha, beta)
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(splx, rhs, mmode, alpha, beta)
!
IF(me.EQ.0) CALL putarr(fid, '/rhs', rhs)
!
! Factor and solve
!
CALL factor(mat, debug=.FALSE.)
CALL bsolve(mat, rhs, sol, debug=.FALSE.)
!
nzfact = mat%mumps_par%INFOG(29)
IF(nzfact<0) THEN
nzfact = -nzfact*1000000
END IF
IF(me.EQ.0) THEN
CALL putarr(fid, '/sol', sol)
WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ',get_count(mat)
WRITE(*,'(a,i8)') 'Number of nonzeros in factors of A = ',nzfact
!
! Compute residue
!
WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(mat,sol)-rhs)
END IF
!===========================================================================
! 3.0 Check solution
!
IF(me.EQ.0) THEN
!
! Exact solution
ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt))
dx=2.0d0*pi/REAL(npt,8)
x = (/ (i*dx, i=0,npt) /)
solana = COS(mmode*x)
!
! Prolongate solution using periodicity
!
ALLOCATE(bcoef(dim))
bcoef(1:nrank) = sol(1:nrank)
DO i=nrank+1,dim
bcoef(i) = bcoef(MODULO(i-1,nrank)+1)
END DO
!
! Interpolate field
!
CALL gridval(splx, x, solcal, 0, bcoef)
!
err = ABS(solcal-solana)
CALL putarr(fid, '/x', x)
CALL putarr(fid, '/solana', solana)
CALL putarr(fid, '/solcal', solcal)
CALL putarr(fid, '/err', err)
!
! Compute discretization error norm by Gauss integration
!
err_norm=0.0
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
DO i=1,nx
CALL get_gauss(splx, ngauss, i, xgauss, wgauss)
CALL gridval(splx, xgauss, solcal(1:ngauss), 0)
solana(1:ngauss) = COS(mmode*xgauss)
err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), &
& solana(1:ngauss)-solcal(1:ngauss))
err_norm = err_norm + SUM(wgauss*err(1:ngauss))
END DO
err_norm = SQRT(err_norm)
WRITE(*,'(a,1pe12.3)') 'Discretization error ', err_norm
END IF
!
!===========================================================================
! 4.0 Test of getrow/putrow, getcol/putcol and mcopy
!
CALL init(nrank, 2, newmat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos)
ALLOCATE(arow(nrank), newsol(nrank))
!
DO i=1,nrank
CALL getrow(mat, i, arow)
CALL putrow(newmat, i, arow)
END DO
CALL factor(newmat)
CALL bsolve(newmat, rhs, newsol)
WRITE(*,'(/a)') 'putrow/getrow ...'
WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs)
WRITE(*,'(a,1pe12.3)') 'Error ', norm2(sol-newsol)
!
DO i=1,nrank
CALL getcol(mat, i, arow)
CALL putcol(newmat, i, arow)
END DO
CALL factor(newmat)
CALL bsolve(newmat, rhs, newsol)
WRITE(*,'(/a)') 'putcol/getcol ...'
WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs)
WRITE(*,'(a,1pe12.3)') 'Error ', norm2(sol-newsol)
!
CALL clear_mat(newmat)
CALL mcopy(mat, newmat)
WRITE(*,'(/a)') 'mcopy ...'
newmat%val = (1000.0d0,0.0d0)*newmat%val
CALL factor(newmat)
CALL bsolve(newmat, rhs, newsol)
WRITE(*,'(a)') 'Backsolve the new system'
WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol)
!
WRITE(*,'(a)') 'Destroy NEWMAT ...'
CALL destroy(newmat)
!
CALL bsolve(mat, rhs, sol)
WRITE(*,'(/a)') 'Backsolve the old system'
WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(sol)
!
WRITE(*,'(a)') 'Destroy MAT ...'
CALL destroy(mat)
!
!!$ WRITE(*,'(/a)') 'Should crash since NEWMAT is gone!'
!!$ CALL bsolve(newmat, rhs, newsol)
!===========================================================================
! 9.0 Clean up
!
IF(me.EQ.0) THEN
DEALLOCATE(x, solcal, solana, err)
DEALLOCATE(bcoef)
DEALLOCATE(xgauss, wgauss)
END IF
DEALLOCATE(xgrid)
DEALLOCATE(rhs, sol)
DEALLOCATE(arow, newsol)
CALL destroy_sp(splx)
IF(me.EQ.0) CALL closef(fid)
CALL mpi_finalize(ierr)
END PROGRAM main
diff --git a/examples/pde1dp_cmpl_pardiso.f90 b/examples/pde1dp_cmpl_pardiso.f90
index e3cf4a5..fccd3f6 100644
--- a/examples/pde1dp_cmpl_pardiso.f90
+++ b/examples/pde1dp_cmpl_pardiso.f90
@@ -1,457 +1,457 @@
!>
!> @file pde1dp_cmpl_pardiso.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
MODULE pde1dp_cmpl_pardiso_mod
USE bsplines
USE pardiso_bsplines
IMPLICIT NONE
!
CONTAINS
SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta)
!
TYPE(spline1d) :: spl
INTEGER, INTENT(in) :: mmode
DOUBLE COMPLEX, INTENT(in) :: alpha, beta
DOUBLE COMPLEX, INTENT(out) :: rhs(:)
!
INTEGER :: dim, nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, it, irow
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE COMPLEX :: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
nrank = SIZE(rhs)
CALL get_dim(spl, dim, nx, nidbas)
!
ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
rhs(:) = 0.0d0
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
contrib = wgauss(igauss) * rhseq(xgauss(igauss))
DO it=0,nidbas
irow=MODULO(i+it-1,nx) + 1 ! Periodic BC
rhs(irow) = rhs(irow) + contrib*fun(it,1)
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
CONTAINS
DOUBLE COMPLEX FUNCTION rhseq(x)
DOUBLE PRECISION, INTENT(in) :: x
DOUBLE PRECISION :: arg
arg = mmode*x
rhseq = (mmode**2*alpha-beta)*COS(arg)
END FUNCTION rhseq
END SUBROUTINE disrhs
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE dismat(spl, mat, alpha, beta)
!
TYPE(spline1d) :: spl
TYPE(zpardiso_mat) :: mat
DOUBLE COMPLEX, INTENT(in) :: alpha, beta
!
INTEGER :: dim, nx, nidbas, ngauss
INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE COMPLEX :: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order
DOUBLE COMPLEX, ALLOCATABLE :: coefs(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, dim, nx, nidbas)
ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
CALL coefeq(xgauss(igauss), idert, iderw, coefs)
DO iterm=1,kterms
DO jt=0,nidbas
DO iw=0,nidbas
contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
& fun(iw,iderw(iterm)) * wgauss(igauss)
irow=MODULO(i+iw-1,nx) + 1 ! Periodic BC
jcol=MODULO(i+jt-1,nx) + 1
CALL updtmat(mat, irow, jcol, contrib)
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
DEALLOCATE(iderw, idert, coefs)
!
CONTAINS
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt))
!
c(1) = alpha
idt(1) = 1
idw(1) = 1
!
c(2) = -beta
idt(2) = 0
idw(2) = 0
END SUBROUTINE coefeq
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
FUNCTION norm2(x)
!
! Compute the 2-norm of complex array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE COMPLEX, INTENT(in) :: x(:)
DOUBLE PRECISION :: sum2
!
sum2 = DOT_PRODUCT(x,x)
norm2 = SQRT(sum2)
END FUNCTION norm2
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE meshdist(mmode, x)
!
! Construct a 1d non-equidistant mesh given a
! mesh distribution function.
!
INTEGER, INTENT(in) :: mmode
DOUBLE PRECISION, INTENT(inout) :: x(0:)
INTEGER :: nx, nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
nx = SIZE(x)-1
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = 2.0 + COS(mmode*x)
END FUNCTION fdist
END SUBROUTINE meshdist
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE pde1dp_cmpl_pardiso_mod
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
!
PROGRAM main
!
! 1D complex PDE with periodic BC
!
USE pde1dp_cmpl_pardiso_mod
USE futils
USE conmat_mod
!
IMPLICIT NONE
TYPE(spline1d) :: splx
TYPE(zpardiso_mat) :: mat
TYPE(zpardiso_mat) :: newmat
INTEGER :: kl, ku, nrank
!
CHARACTER(len=128) :: file='pde1dp_cmpl_pardiso.h5'
INTEGER :: fid
INTEGER :: nx, nidbas, ngauss, mmode, npt, dim
LOGICAL :: nlequid
LOGICAL :: nlsym, nlherm, nlpos
DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0
DOUBLE PRECISION :: dx
DOUBLE COMPLEX :: alpha, beta
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err
DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef
DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: newsol, arow
DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION :: err_norm
INTEGER :: i
!
NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt, &
& nlsym, nlherm, nlpos
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number oh intevals in x
nidbas = 3 ! Degree of splines
ngauss = 4 ! Number of Gauss points/interval
nlequid = .TRUE. ! Use exact sol. as mesh dist. function if .FALSE.
mmode = 1 ! Fourier mode
alpha = (1.0, 1.0) ! Complex "diffusion"
beta = 1.0
npt = 100
nlsym = .TRUE. ! Is matrice symmetric
nlherm = .FALSE. ! Is matrice hermitian
nlpos = .TRUE. ! and positive definite ?
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x axis
!
ALLOCATE(xgrid(0:nx))
dx = 2.d0*pi/REAL(nx,8)
xgrid = (/ (i*dx,i=0,nx) /)
IF( .NOT. nlequid ) THEN
CALL meshdist(mmode, xgrid)
END IF
WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
CALL putarr(fid, '/xgrid', xgrid)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up periodic spline
!
CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.)
WRITE(*,'(a,l6)') 'nlequid =', nlequid
nrank = nx ! Rank of the FE matrix
!
! FE matrix assembly
!
CALL init(nrank, 2, mat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos)
CALL get_dim(splx, dim)
WRITE(*,'(/a,4i6)') 'nrank, dim', nrank, dim
!!$ CALL dismat(splx, mat, alpha, beta)
CALL conmat(splx, mat, coefeq)
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(splx, rhs, mmode, alpha, beta)
!
CALL putarr(fid, '/rhs', rhs)
!
! Factor and solve
!
WRITE(*,'(a/(10i6))') 'iparm', mat%p%iparm
CALL factor(mat)
CALL putmat(fid,'/MAT', mat)
CALL bsolve(mat, rhs, sol)
CALL putarr(fid, '/sol', sol)
WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ',get_count(mat)
WRITE(*,'(a,i8)') 'Number of nonzeros in factors of A = ',mat%p%iparm(18)
!
! Compute residue
!
WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(mat,sol)-rhs)
!===========================================================================
! 3.0 Check solution
!
! Exact solution
ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt))
dx=2.0d0*pi/REAL(npt,8)
x = (/ (i*dx, i=0,npt) /)
solana = COS(mmode*x)
!
! Prolongate solution using periodicity
!
ALLOCATE(bcoef(dim))
bcoef(1:nrank) = sol(1:nrank)
DO i=nrank+1,dim
bcoef(i) = bcoef(MODULO(i-1,nrank)+1)
END DO
!
! Interpolate field
!
CALL gridval(splx, x, solcal, 0, bcoef)
!
err = ABS(solcal-solana)
CALL putarr(fid, '/x', x)
CALL putarr(fid, '/solana', solana)
CALL putarr(fid, '/solcal', solcal)
CALL putarr(fid, '/err', err)
!
! Compute discretization error norm by Gauss integration
!
err_norm=0.0
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
DO i=1,nx
CALL get_gauss(splx, ngauss, i, xgauss, wgauss)
CALL gridval(splx, xgauss, solcal(1:ngauss), 0)
solana(1:ngauss) = COS(mmode*xgauss)
err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), &
& solana(1:ngauss)-solcal(1:ngauss))
err_norm = err_norm + SUM(wgauss*err(1:ngauss))
END DO
err_norm = SQRT(err_norm)
WRITE(*,'(a,1pe12.3)') 'Discretization error ', err_norm
!
!===========================================================================
! 4.0 Test of getrow/putrow, getcol/putcol and mcopy
!
CALL init(nrank, 2, newmat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos)
ALLOCATE(arow(nrank), newsol(nrank))
!
DO i=1,nrank
CALL getrow(mat, i, arow)
CALL putrow(newmat, i, arow)
END DO
CALL factor(newmat)
CALL bsolve(newmat, rhs, newsol)
WRITE(*,'(/a)') 'putrow/getrow ...'
WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs)
WRITE(*,'(a,1pe12.3)') 'Error ', norm2(sol-newsol)
!
DO i=1,nrank
CALL getcol(mat, i, arow)
CALL putcol(newmat, i, arow)
END DO
CALL factor(newmat)
CALL bsolve(newmat, rhs, newsol)
WRITE(*,'(/a)') 'putcol/getcol ...'
WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs)
WRITE(*,'(a,1pe12.3)') 'Error ', norm2(sol-newsol)
!
CALL clear_mat(newmat)
CALL mcopy(mat, newmat)
WRITE(*,'(/a)') 'mcopy ...'
newmat%val = (1000.0d0,0.0d0)*newmat%val
CALL factor(newmat)
CALL bsolve(newmat, rhs, newsol)
WRITE(*,'(a)') 'Backsolve the new system'
WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol)
!
WRITE(*,'(a)') 'Destroy NEWMAT ...'
CALL destroy(newmat)
!
CALL bsolve(mat, rhs, sol)
WRITE(*,'(/a)') 'Backsolve the old system'
WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(sol)
!
WRITE(*,'(a)') 'Destroy MAT ...'
CALL destroy(mat)
!!$!
!!$ WRITE(*,'(/a)') 'Should crash since NEWMAT is gone!'
!!$ CALL bsolve(newmat, rhs, newsol)
!!$ WRITE(*,'(a)') 'Backsolve the new system'
!!$ WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol)
!===========================================================================
! 9.0 Clean up
!
DEALLOCATE(x, solcal, solana, err)
DEALLOCATE(xgauss, wgauss)
DEALLOCATE(bcoef)
DEALLOCATE(xgrid)
DEALLOCATE(rhs, sol)
DEALLOCATE(arow, newsol)
CALL destroy_sp(splx)
CALL closef(fid)
CONTAINS
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE COMPLEX, INTENT(out) :: c(:)
!
c(1) = alpha
idt(1) = 1
idw(1) = 1
!
c(2) = -beta
idt(2) = 0
idw(2) = 0
END SUBROUTINE coefeq
END PROGRAM main
diff --git a/examples/pde1dp_cmpl_wsmp.f90 b/examples/pde1dp_cmpl_wsmp.f90
index 1760563..3e12386 100644
--- a/examples/pde1dp_cmpl_wsmp.f90
+++ b/examples/pde1dp_cmpl_wsmp.f90
@@ -1,436 +1,436 @@
!>
!> @file pde1dp_cmpl_wsmp.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
MODULE pde1dp_cmpl_wsmp_mod
USE bsplines
USE wsmp_bsplines
IMPLICIT NONE
!
CONTAINS
SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta)
!
TYPE(spline1d) :: spl
INTEGER, INTENT(in) :: mmode
DOUBLE COMPLEX, INTENT(in) :: alpha, beta
DOUBLE COMPLEX, INTENT(out) :: rhs(:)
!
INTEGER :: dim, nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, it, irow
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE COMPLEX :: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
nrank = SIZE(rhs)
CALL get_dim(spl, dim, nx, nidbas)
!
ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
rhs(:) = 0.0d0
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
contrib = wgauss(igauss) * rhseq(xgauss(igauss))
DO it=0,nidbas
irow=MODULO(i+it-1,nx) + 1 ! Periodic BC
rhs(irow) = rhs(irow) + contrib*fun(it,1)
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
CONTAINS
DOUBLE COMPLEX FUNCTION rhseq(x)
DOUBLE PRECISION, INTENT(in) :: x
DOUBLE PRECISION :: arg
arg = mmode*x
rhseq = (mmode**2*alpha-beta)*COS(arg)
END FUNCTION rhseq
END SUBROUTINE disrhs
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE dismat(spl, mat, alpha, beta)
!
TYPE(spline1d) :: spl
TYPE(zwsmp_mat) :: mat
DOUBLE COMPLEX, INTENT(in) :: alpha, beta
!
INTEGER :: dim, nx, nidbas, ngauss
INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE COMPLEX :: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order
DOUBLE COMPLEX, ALLOCATABLE :: coefs(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, dim, nx, nidbas)
ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
CALL coefeq(xgauss(igauss), idert, iderw, coefs)
DO iterm=1,kterms
DO jt=0,nidbas
DO iw=0,nidbas
contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
& fun(iw,iderw(iterm)) * wgauss(igauss)
irow=MODULO(i+iw-1,nx) + 1 ! Periodic BC
jcol=MODULO(i+jt-1,nx) + 1
CALL updtmat(mat, irow, jcol, contrib)
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
DEALLOCATE(iderw, idert, coefs)
!
CONTAINS
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt))
!
c(1) = alpha
idt(1) = 1
idw(1) = 1
!
c(2) = -beta
idt(2) = 0
idw(2) = 0
END SUBROUTINE coefeq
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
FUNCTION norm2(x)
!
! Compute the 2-norm of complex array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE COMPLEX, INTENT(in) :: x(:)
DOUBLE PRECISION :: sum2
!
sum2 = DOT_PRODUCT(x,x)
norm2 = SQRT(sum2)
END FUNCTION norm2
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE meshdist(mmode, x)
!
! Construct a 1d non-equidistant mesh given a
! mesh distribution function.
!
INTEGER, INTENT(in) :: mmode
DOUBLE PRECISION, INTENT(inout) :: x(0:)
INTEGER :: nx, nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
nx = SIZE(x)-1
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = 2.0 + COS(mmode*x)
END FUNCTION fdist
END SUBROUTINE meshdist
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE pde1dp_cmpl_wsmp_mod
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
!
PROGRAM main
!
! 1D complex PDE with periodic BC
!
USE pde1dp_cmpl_wsmp_mod
USE futils
!
IMPLICIT NONE
TYPE(spline1d) :: splx
TYPE(zwsmp_mat) :: mat
TYPE(zwsmp_mat) :: newmat
INTEGER :: kl, ku, nrank
!
CHARACTER(len=128) :: file='pde1dp_cmpl_wsmp.h5'
INTEGER :: fid
INTEGER :: nx, nidbas, ngauss, mmode, npt, dim
LOGICAL :: nlequid
LOGICAL :: nlsym, nlherm, nlpos
DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0
DOUBLE PRECISION :: dx
DOUBLE COMPLEX :: alpha, beta
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err
DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef
DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: newsol, arow
DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION :: err_norm
INTEGER :: i
!
NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt, &
& nlsym, nlherm, nlpos
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number oh intevals in x
nidbas = 3 ! Degree of splines
ngauss = 4 ! Number of Gauss points/interval
nlequid = .TRUE. ! Use exact sol. as mesh dist. function if .FALSE.
mmode = 1 ! Fourier mode
alpha = (1.0, 1.0) ! Complex "diffusion"
beta = 1.0
npt = 100
nlsym = .TRUE. ! Is matrice symmetric
nlherm = .FALSE. ! Is matrice hermitian
nlpos = .TRUE. ! and positive definite ?
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x axis
!
ALLOCATE(xgrid(0:nx))
dx = 2.d0*pi/REAL(nx,8)
xgrid = (/ (i*dx,i=0,nx) /)
IF( .NOT. nlequid ) THEN
CALL meshdist(mmode, xgrid)
END IF
WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
CALL putarr(fid, '/xgrid', xgrid)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up periodic spline
!
CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.)
WRITE(*,'(a,l6)') 'nlequid =', nlequid
nrank = nx ! Rank of the FE matrix
!
! FE matrix assembly
!
CALL init(nrank, 2, mat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos)
CALL get_dim(splx, dim)
WRITE(*,'(/a,4i6)') 'nrank, dim', nrank, dim
CALL dismat(splx, mat, alpha, beta)
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(splx, rhs, mmode, alpha, beta)
!
CALL putarr(fid, '/rhs', rhs)
!
! Factor and solve
!
!!$ CALL factor(mat, nlmetis=.TRUE.)
CALL factor(mat)
CALL putmat(fid,'/MAT', mat)
CALL bsolve(mat, rhs, sol)
CALL putarr(fid, '/sol', sol)
WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ',get_count(mat)
WRITE(*,'(a,i8)') 'Number of nonzeros in factors of A = ',mat%p%iparm(18)
!
! Compute residue
!
WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(mat,sol)-rhs)
!===========================================================================
! 3.0 Check solution
!
! Exact solution
ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt))
dx=2.0d0*pi/REAL(npt,8)
x = (/ (i*dx, i=0,npt) /)
solana = COS(mmode*x)
!
! Prolongate solution using periodicity
!
ALLOCATE(bcoef(dim))
bcoef(1:nrank) = sol(1:nrank)
DO i=nrank+1,dim
bcoef(i) = bcoef(MODULO(i-1,nrank)+1)
END DO
!
! Interpolate field
!
CALL gridval(splx, x, solcal, 0, bcoef)
!
err = ABS(solcal-solana)
CALL putarr(fid, '/x', x)
CALL putarr(fid, '/solana', solana)
CALL putarr(fid, '/solcal', solcal)
CALL putarr(fid, '/err', err)
!
! Compute discretization error norm by Gauss integration
!
err_norm=0.0
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
DO i=1,nx
CALL get_gauss(splx, ngauss, i, xgauss, wgauss)
CALL gridval(splx, xgauss, solcal(1:ngauss), 0)
solana(1:ngauss) = COS(mmode*xgauss)
err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), &
& solana(1:ngauss)-solcal(1:ngauss))
err_norm = err_norm + SUM(wgauss*err(1:ngauss))
END DO
err_norm = SQRT(err_norm)
WRITE(*,'(a,1pe12.3)') 'Discretization error ', err_norm
!
!===========================================================================
! 4.0 Test of getrow/putrow, getcol/putcol and mcopy
!
CALL init(nrank, 2, newmat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos)
ALLOCATE(arow(nrank), newsol(nrank))
!
DO i=1,nrank
CALL getrow(mat, i, arow)
CALL putrow(newmat, i, arow)
END DO
CALL factor(newmat)
CALL bsolve(newmat, rhs, newsol)
WRITE(*,'(/a)') 'putrow/getrow ...'
WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs)
WRITE(*,'(a,1pe12.3)') 'Error ', norm2(sol-newsol)
!
DO i=1,nrank
CALL getcol(mat, i, arow)
CALL putcol(newmat, i, arow)
END DO
CALL factor(newmat)
CALL bsolve(newmat, rhs, newsol)
WRITE(*,'(/a)') 'putcol/getcol ...'
WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs)
WRITE(*,'(a,1pe12.3)') 'Error ', norm2(sol-newsol)
!
CALL clear_mat(newmat)
CALL mcopy(mat, newmat)
WRITE(*,'(/a)') 'mcopy ...'
newmat%val = (1000.0d0,0.0d0)*newmat%val
CALL factor(newmat)
CALL bsolve(newmat, rhs, newsol)
WRITE(*,'(a, i3)') 'Backsolve the system', newmat%matid
WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol)
!
CALL bsolve(mat, rhs, sol)
WRITE(*,'(a, i3)') 'Backsolve the system', mat%matid
WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(sol)
!
CALL bsolve(newmat, rhs, newsol)
WRITE(*,'(a, i3)') 'Backsolve the system', newmat%matid
WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol)
!===========================================================================
! 9.0 Clean up
!
DEALLOCATE(x, solcal, solana, err)
DEALLOCATE(xgauss, wgauss)
DEALLOCATE(bcoef)
DEALLOCATE(xgrid)
DEALLOCATE(rhs, sol)
DEALLOCATE(arow, newsol)
CALL destroy(mat)
CALL destroy(newmat)
CALL destroy_sp(splx)
CALL closef(fid)
END PROGRAM main
diff --git a/examples/pde1dp_mod.f90 b/examples/pde1dp_mod.f90
index 0a508ac..7c8f6ba 100644
--- a/examples/pde1dp_mod.f90
+++ b/examples/pde1dp_mod.f90
@@ -1,225 +1,225 @@
!>
!> @file pde1dp_mod.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
MODULE pde1dp_mod
!
USE bsplines
USE matrix
IMPLICIT NONE
DOUBLE PRECISION, ALLOCATABLE :: bcoef(:)
TYPE(spline1d), SAVE :: splx
!
CONTAINS
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function defined in FDIST
!
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
SUBROUTINE dismat(spl, mat)
!
! Assembly FE matrix (with periodic BC) mat using spline spl
!
TYPE(spline1d), INTENT(in) :: spl
TYPE(periodic_mat), INTENT(inout) :: mat
INTEGER :: dim, nx, nidbas, ngauss
INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION :: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order
DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl, dim, nx, nidbas)
ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
!
! Weak form
!
kterms = mat%mat%nterms
ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
CALL coefeq(xgauss(igauss), idert, iderw, coefs)
DO iterm=1,kterms
DO jt=0,nidbas
DO iw=0,nidbas
contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
& fun(iw,iderw(iterm)) * wgauss(igauss)
irow=MODULO(i+iw-1,nx) + 1 ! Periodic BC
jcol=MODULO(i+jt-1,nx) + 1
CALL updtmat(mat, irow, jcol, contrib)
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
DEALLOCATE(iderw, idert, coefs)
END SUBROUTINE dismat
!
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
!
! Mass matrix
!
c(1) = 1.0d0
idt(1) = 0
idw(1) = 0
END SUBROUTINE coefeq
!+++
SUBROUTINE disrhs(spl, rhs)
!
! Assenbly the RHS using spline spl
!
TYPE(spline1d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
INTEGER :: dim, nrank, nx, nidbas, ngauss
INTEGER :: i, igauss, it, irow
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
nrank = SIZE(rhs)
CALL get_dim(spl, dim, nx, nidbas)
!
ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!===========================================================================
! 2.0 Assembly loop
!
rhs(:) = 0.0d0
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
contrib = wgauss(igauss) * rhseq(xgauss(igauss))
DO it=0,nidbas
irow=MODULO(i+it-1,nx) + 1 ! Periodic BC
rhs(irow) = rhs(irow) + contrib*fun(it,1)
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
END SUBROUTINE disrhs
!
DOUBLE PRECISION FUNCTION rhseq(x)
DOUBLE PRECISION, INTENT(in) :: x
DOUBLE PRECISION :: xarr(1), farr(1)
INTEGER, SAVE :: icall =0
xarr(1) = x
IF( icall.EQ.0 ) THEN
icall = icall+1
CALL gridval(splx, xarr, farr, 0, bcoef)
ELSE
CALL gridval(splx, xarr, farr, 0)
END IF
rhseq = farr(1)
END FUNCTION rhseq
END MODULE pde1dp_mod
diff --git a/examples/pde2d.f90 b/examples/pde2d.f90
index b881711..f46883f 100644
--- a/examples/pde2d.f90
+++ b/examples/pde2d.f90
@@ -1,409 +1,409 @@
!>
!> @file pde2d.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Solving the following 2d PDE using splines:
!
! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
! exact solution: f(x,y) = (1-x^2) x^m cos(my)
!
USE bsplines
USE matrix
USE futils
USE conmat_mod
!
IMPLICIT NONE
INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
LOGICAL :: nlppform, nlconmat
INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it
DOUBLE PRECISION :: pi, coefx(5), coefy(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
TYPE(spline2d) :: splxy
TYPE(gbmat) :: mat
!
CHARACTER(len=128) :: file='pde2d.h5'
INTEGER :: fid
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
DOUBLE PRECISION :: seconds, mem, dopla
DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
INTEGER :: nits=500
!
INTERFACE
SUBROUTINE dismat(spl, mat)
USE bsplines
USE matrix
TYPE(spline2d), INTENT(in) :: spl
TYPE(gbmat), INTENT(inout) :: mat
END SUBROUTINE dismat
SUBROUTINE disrhs(mbess, spl, rhs)
USE bsplines
INTEGER, INTENT(in) :: mbess
TYPE(spline2d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
END SUBROUTINE disrhs
SUBROUTINE meshdist(coefs, x, nx)
DOUBLE PRECISION, INTENT(in) :: coefs(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
END SUBROUTINE meshdist
SUBROUTINE ibcmat(mat, ny)
USE matrix
TYPE(gbmat), INTENT(inout) :: mat
INTEGER, INTENT(in) :: ny
END SUBROUTINE ibcmat
SUBROUTINE ibcrhs(rhs, ny)
DOUBLE PRECISION, INTENT(inout) :: rhs(:)
INTEGER, INTENT(in) :: ny
END SUBROUTINE ibcrhs
!!$ SUBROUTINE coefeq_poisson(x, y, idt, idw, c)
!!$ DOUBLE PRECISION, INTENT(in) :: x, y
!!$ INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
!!$ DOUBLE PRECISION, INTENT(out) :: c(:)
!!$ END SUBROUTINE coefeq_poisson
END INTERFACE
!
NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlconmat, &
& coefx, coefy
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number of intervals in x
ny = 8 ! Number of intervals in y
nidbas = (/3,3/) ! Degree of splines
ngauss = (/4,4/) ! Number of Gauss points/interval
mbess = 2 ! Exponent of differential problem
nterms = 2 ! Number of terms in weak form
nlppform = .TRUE. ! Use PPFORM for gridval or not
nlconmat = .TRUE. ! Use CONMAT instead of DISMAT
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x (=radial) & y (=poloidal) axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny))
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi
CALL meshdist(coefy, ygrid, ny)
!
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
CALL attach(fid, '/', 'MBESS', mbess)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
CALL set_spline(nidbas, ngauss, &
& xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
!!$ WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
!!$ WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
!
! FE matrix assembly
!
nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix
kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals
ku = kl ! Number of super-diagnonals
WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
!
CALL init(kl, ku, nrank, nterms, mat)
t0 = seconds()
IF(nlconmat) THEN
CALL conmat(splxy, mat, coefeq_poisson)
ELSE
CALL dismat(splxy, mat)
END IF
tmat = seconds() - t0
CALL putmat(fid, '/MAT0', mat, 'Assembled GB matrice')
ALLOCATE(arr(nrank))
!
! BC on Matrix
!
IF(nrank.LT.100) &
& WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', mat%val(kl+ku+1,:)
CALL ibcmat(mat, ny)
IF(nrank.LT.100) &
& WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', mat%val(kl+ku+1,:)
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(mbess, splxy, rhs)
!
! BC on RHS
!
CALL ibcrhs(rhs, ny)
CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
CALL putmat(fid, '/MAT1', mat, 'GB matrice with BC')
WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
!===========================================================================
! 3.0 Solve the dicretized system
!
t0 = seconds()
CALL factor(mat)
tfact = seconds() - t0
gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
t0 = seconds()
CALL bsolve(mat, rhs, sol)
!
! Backtransform of solution
!
sol(1:ny-1) = sol(ny)
!
! Spline coefficients, taking into account of periodicity in y
! Note: in SOL, y was numbered first.
!
dimx = splxy%sp1%dim
dimy = splxy%sp2%dim
ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
DO j=0,dimy-1
DO i=0,dimx-1
ij = MODULO(j,ny) + i*ny + 1
bcoef(i,j) = sol(ij)
END DO
END DO
WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
!
tsolv = seconds() - t0
gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
!===========================================================================
! 4.0 Check the solution
!
! Check function values computed with various method
!
ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
DO i=0,nx
DO j=0,ny
solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
END DO
END DO
jder = (/0,0/)
!
! Compute PPFORM/BCOEFS at first call to gridval
CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
!
WRITE(*,'(/a)') '*** Checking solutions'
t0 = seconds()
DO it=1,nits ! nits iterations for timing
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
END DO
tgrid = (seconds() - t0)/REAL(nits)
errsol = solana - solcal
IF( SIZE(bcoef,2) .LE. 10 ) THEN
CALL prnmat('BCOEF', bcoef)
CALL prnmat('SOLANA', solana)
CALL prnmat('SOLCAL', solcal)
CALL prnmat('ERRSOL', errsol)
END IF
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
& norm2(errsol) / norm2(solana)
WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid
!
CALL putarr(fid, '/xgrid', xgrid, 'r')
CALL putarr(fid, '/ygrid', ygrid, '\theta')
CALL putarr(fid, '/sol', solcal, 'Solutions')
CALL putarr(fid, '/solana', solana,'Exact solutions')
CALL putarr(fid, '/errors', errsol, 'Errors')
!
! Check derivatives d/dx and d/dy
!
WRITE(*,'(/a)') '*** Checking gradient'
DO i=0,nx
DO j=0,ny
IF( mbess .EQ. 0 ) THEN
solana(i,j) = -2.0d0 * xgrid(i)
ELSE
solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
& xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
END IF
END DO
END DO
!
jder = (/1,0/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
errsol = solana - solcal
CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol)
!
DO i=0,nx
DO j=0,ny
solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
END DO
END DO
!
jder = (/0,1/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
errsol = solana - solcal
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol)
!===========================================================================
! 9.0 Epilogue
!
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv
WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
!
DEALLOCATE(xgrid, rhs, sol)
DEALLOCATE(solcal, solana, errsol)
DEALLOCATE(bcoef)
DEALLOCATE(arr)
CALL destroy_sp(splxy)
CALL destroy(mat)
!
CALL closef(fid)
!===========================================================================
!
CONTAINS
SUBROUTINE coefeq_poisson(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
!
c(1) = x !
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.d0/x
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq_poisson
!
!+++
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:,:)
DOUBLE PRECISION :: sum2
INTEGER :: i, j
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
DO j=1,SIZE(x,2)
sum2 = sum2 + x(i,j)**2
END DO
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
SUBROUTINE prnmat(label, mat)
CHARACTER(len=*) :: label
DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
INTEGER :: i
WRITE(*,'(/a)') TRIM(label)
DO i=1,SIZE(mat,1)
WRITE(*,'(10(1pe12.3))') mat(i,:)
END DO
END SUBROUTINE prnmat
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
diff --git a/examples/pde2d_mumps.f90 b/examples/pde2d_mumps.f90
index 772fce2..8a2e120 100644
--- a/examples/pde2d_mumps.f90
+++ b/examples/pde2d_mumps.f90
@@ -1,937 +1,937 @@
!>
!> @file pde2d_mumps.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
!
! Solving the 2d PDE using splines and MUMPS non-symmetric and symmetric
! matrix
!
! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
! exact solution: f(x,y) = (1-x^2) x^m cos(my)
!
MODULE pde2d_mumps_mod
USE bsplines
USE mumps_bsplines
IMPLICIT NONE
CONTAINS
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE dismat(spl, mat)
!
! Assembly of FE matrix mat using spline spl
!
TYPE(spline2d), INTENT(in) :: spl
TYPE(mumps_mat), INTENT(inout) :: mat
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2
INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
DOUBLE PRECISION:: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order
DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form
INTEGER, ALLOCATABLE :: left1(:), left2(:)
!
INTEGER :: istart, iend
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
!!$ WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
!!$ WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
ALLOCATE(coefs(kterms,ng1,ng2))
!
ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
ALLOCATE(fun2(0:nidbas2,0:1,ng2)) !
!
! Matrix partition
!
istart = mat%istart
iend = mat%iend
!===========================================================================
! 2.0 Assembly loop
!
ALLOCATE(left1(ng1))
ALLOCATE(left2(ng2))
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
left1 = i
CALL basfun(xg1, spl%sp1, fun1, left1)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
left2 = j
CALL basfun(xg2, spl%sp2, fun2, left2)
!
DO ig1=1,ng1
DO ig2=1,ng2
CALL coefeq(xg1(ig1), xg2(ig2), &
& idert(:,:,ig1,ig2), &
& iderw(:,:,ig1,ig2), &
& coefs(:,ig1,ig2))
END DO
END DO
!
DO iw1=0,nidbas1 ! Weight function in dir 1
igw1 = i+iw1
DO iw2=0,nidbas2 ! Weight function in dir 2
igw2 = MODULO(j+iw2-1, n2) + 1
irow = igw2 + (igw1-1)*n2
IF( irow.GE.istart .AND. irow.LE.iend) THEN
DO it1=0,nidbas1 ! Test function in dir 1
igt1 = i+it1
DO it2=0,nidbas2 ! Test function in dir 2
igt2 = MODULO(j+it2-1, n2) + 1
jcol = igt2 + (igt1-1)*n2
!-------------
contrib = 0.0d0
DO ig1=1,ng1
DO ig2=1,ng2
DO iterm=1,kterms
contrib = contrib + &
& fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
& fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
& coefs(iterm,ig1,ig2) * &
& fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
& fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
& wg1(ig1) * wg2(ig2)
END DO
END DO
END DO
CALL updtmat(mat, irow, jcol, contrib)
!-------------
END DO
END DO
END IF
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
DEALLOCATE(idert, iderw, coefs)
DEALLOCATE(left1,left2)
!
CONTAINS
SUBROUTINE coefeq(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
!
! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
!
c(1) = x !
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.d0/x
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE disrhs(mbess, spl, rhs)
!
! Assembly the RHS using 2d spline spl
!
INTEGER, INTENT(in) :: mbess
TYPE(spline2d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
!
ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!===========================================================================
! 2.0 Assembly loop
!
nrank = SIZE(rhs)
rhs(1:nrank) = 0.0d0
!
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), spl%sp1, fun1, i)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), spl%sp2, fun2, j)
contrib = wg1(ig1)*wg2(ig2) * &
& rhseq(xg1(ig1),xg2(ig2), mbess)
DO k1=0,nidbas1
i1 = i+k1
DO k2=0,nidbas2
j2 = MODULO(j+k2-1,n2) + 1
ij = j2 + (i1-1)*n2
rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
DOUBLE PRECISION, INTENT(in) :: x1, x2
INTEGER, INTENT(in) :: m
rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
END FUNCTION rhseq
END SUBROUTINE disrhs
!
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcmat(mat, ny)
!
! Apply BC on matrix
!
TYPE(mumps_mat), INTENT(inout) :: mat
INTEGER, INTENT(in) :: ny
INTEGER :: nrank, i, j
DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
!===========================================================================
! 1.0 Prologue
!
nrank = mat%rank
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum on the NY-th row
!
ALLOCATE(zsum(nrank), arr(nrank))
zsum = 0.0d0
DO i=1,ny
arr = 0.0d0
CALL getrow(mat, i, arr)
zsum(:) = zsum(:) + arr(:)
END DO
IF(mat%nlsym) THEN
zsum(ny) = SUM(zsum(1:ny)) ! using symmetry
END IF
CALL putrow(mat, ny, zsum)
!
! The horizontal sum on the NY-th column
!
IF( .NOT.mat%nlsym) THEN
zsum = 0.0d0
DO j=1,ny
arr = 0.0d0
CALL getcol(mat, j, arr)
zsum(ny:) = zsum(ny:) + arr(ny:)
END DO
CALL putcol(mat, ny, zsum)
END IF
!
! The away operator
!
IF( .NOT.mat%nlsym) THEN
DO j = 1,ny-1
arr = 0.0d0; arr(j) = 1.0d0
CALL putcol(mat, j, arr)
END DO
END IF
!
DO i = 1,ny-1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
DEALLOCATE(zsum)
DEALLOCATE(arr)
!
!===========================================================================
! 3.0 Dirichlet on right boundary
!
ALLOCATE(arr(nrank))
IF( .NOT.mat%nlsym) THEN
DO j = nrank, nrank-ny+1, -1
arr = 0.0d0; arr(j) = 1.0d0
CALL putcol(mat, j, arr)
END DO
END IF
!
DO i = nrank, nrank-ny+1, -1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
DEALLOCATE(arr)
!===========================================================================
! 9.0 Epilogue
!
END SUBROUTINE ibcmat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcrhs(rhs, ny)
!
! Apply BC on RHS
!
DOUBLE PRECISION, INTENT(inout) :: rhs(:)
INTEGER, INTENT(in) :: ny
INTEGER :: nrank
DOUBLE PRECISION :: zsum
!===========================================================================
! 1.0 Prologue
!
nrank = SIZE(rhs,1)
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum
!
zsum = SUM(rhs(1:ny))
rhs(ny) = zsum
rhs(1:ny-1) = 0.0d0
!===========================================================================
! 3.0 Dirichlet on right boundary
!
rhs(nrank-ny+1:nrank) = 0.0d0
END SUBROUTINE ibcrhs
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE write_matrix(lun, mat, comm)
!
! Write the distribute matrix to (single) file
!
INCLUDE 'mpif.h'
!
INTEGER :: lun
TYPE(mumps_mat) :: mat
INTEGER, INTENT(in) :: comm
!
INTEGER :: nprocs, me, ierr
INTEGER :: nrank, nnz, nnz_loc, istart, iend, nloc
INTEGER :: i
INTEGER, ALLOCATABLE :: displs(:), nlocs(:), cols(:), irow(:)
DOUBLE PRECISION, ALLOCATABLE :: val(:)
!
CALL mpi_comm_size(comm, nprocs, ierr)
CALL mpi_comm_rank(comm, me, ierr)
!
IF(.NOT.ASSOCIATED(mat%val)) THEN
WRITE(*,'(a)') 'WRITE_MATRIX: MUMPS matrix does not exist!'
STOP
END IF
!
! Info on matrix
!
!!$ IF(me.EQ.0) THEN
!!$ s0 = mat%nnz_start-1
!!$ DO i=mat%istart,mat%iend
!!$ s=mat%irow(i)-s0
!!$ e=mat%irow(i+1)-1-s0
!!$ WRITE(*,'(a,i6,1pe12.3)') 'nnz, Sum(val)', e-s+1, SUM(mat%val(s:e))
!!$ END DO
!!$ END IF
!
nrank = mat%rank
nnz_loc = mat%nnz_loc
nnz = mat%nnz
istart = mat%istart
iend = mat%iend
!
IF(me.EQ.0) THEN
WRITE(lun) nrank, nnz
END IF
!
! Write irow
!
nloc = iend-istart+1
IF (me.EQ.0) THEN
ALLOCATE(displs(0:nprocs))
ALLOCATE(nlocs(0:nprocs-1))
ALLOCATE(irow(nrank+1))
END IF
CALL mpi_gather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, 0, comm, ierr)
IF(me.EQ.0) THEN
displs(0) = 0
DO i=0,nprocs-1
displs(i+1) = displs(i)+nlocs(i)
END DO
END IF
CALL mpi_gatherv(mat%irow, nloc, MPI_INTEGER, &
& irow, nlocs, displs, MPI_INTEGER, 0, comm, ierr)
IF(me.EQ.0) THEN
irow(nrank+1) = nnz+1
WRITE(lun) irow
DEALLOCATE(irow)
END IF
!
! Write cols
!
nloc = mat%nnz_loc
IF(me.EQ.0) THEN
ALLOCATE(cols(nnz))
END IF
CALL mpi_gather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, 0, comm, ierr)
IF(me.EQ.0) THEN
displs(0) = 0
DO i=0,nprocs-1
displs(i+1) = displs(i)+nlocs(i)
END DO
END IF
CALL mpi_gatherv(mat%cols, nloc, MPI_INTEGER, &
& cols, nlocs, displs, MPI_INTEGER, 0, comm, ierr)
IF(me.EQ.0) THEN
WRITE(lun) cols
DEALLOCATE(cols)
END IF
!
! Write val (Same data partition as "cols"
!
IF(me.EQ.0) THEN
ALLOCATE(val(nnz))
END IF
CALL mpi_gatherv(mat%val, nloc, MPI_DOUBLE_PRECISION, &
& val, nlocs, displs, MPI_DOUBLE_PRECISION, 0, comm, ierr)
IF(me.EQ.0) THEN
WRITE(lun) val
DEALLOCATE(val)
END IF
!
! Epilogue
!
IF(me.EQ.0) THEN
DEALLOCATE(displs, nlocs)
END IF
END SUBROUTINE write_matrix
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE pde2d_mumps_mod
PROGRAM main
USE pde2d_mumps_mod
USE futils
!
IMPLICIT NONE
INCLUDE 'mpif.h'
INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
LOGICAL :: debug_mumps=.FALSE.
LOGICAL :: nlppform
INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
DOUBLE PRECISION :: pi, coefx(5), coefy(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
TYPE(spline2d) :: splxy
TYPE(mumps_mat) :: mat
!
CHARACTER(len=128) :: file='pde2d_mumps.h5'
INTEGER :: fid
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
DOUBLE PRECISION :: seconds, mem
DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1
DOUBLE PRECISION :: tconv, treord
INTEGER :: nits=100
LOGICAL :: nlsym, nlpos
LOGICAL :: nlmetis, nlforce_zero
LOGICAL :: nlserial
!
INTEGER :: ierr, me
INTEGER(kind=8) :: nzfact
DOUBLE PRECISION :: mem_loc
!
CHARACTER(len=128) :: matfile=''
!
NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlsym, nlpos,&
& nlmetis, nlforce_zero, nlserial, coefx, coefy, matfile, &
& debug_mumps
!===========================================================================
! 1.0 Prologue
!
CALL mpi_init(ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
!
! Read in data specific to run
!
nx = 8 ! Number of intervals in x
ny = 8 ! Number of intervals in y
nidbas = (/3,3/) ! Degree of splines
ngauss = (/4,4/) ! Number of Gauss points/interval
mbess = 2 ! Exponent of differential problem
nterms = 2 ! Number of terms in weak form
nlppform = .TRUE. ! Use PPFORM for gridval or not
nlsym = .FALSE. ! Symmetric or unsymmetric matrix
nlpos = .TRUE. ! Positive definite matrix
nlmetis = .FALSE. ! Use metis ordering or minimum degree
nlserial = .TRUE. ! Serial. The solver is duplicated on each process. Otherwise
! the solver matrix is partionned among the processes.
nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
matfile = '' ! Save matrix file to matfile if not empty
!
IF(me.EQ.0) THEN
READ(*,newrun)
WRITE(*,newrun)
END IF
CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(mbess, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nterms, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlppform, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlpos, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlmetis, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlforce_zero, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(debug_mumps, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlserial, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(coefx, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(coefy, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(matfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
!
! Define grid on x (=radial) & y (=poloidal) axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny))
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi
CALL meshdist(coefy, ygrid, ny)
!
! Create hdf5 file
!
IF(me.EQ.0) THEN
CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
CALL attach(fid, '/', 'MBESS', mbess)
END IF
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
t0 = seconds()
CALL set_spline(nidbas, ngauss, &
& xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
!
! FE matrix assembly
!
nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix
IF(me.EQ.0) WRITE(*,'(a,i8)') 'nrank', nrank
!
IF(nlserial) THEN ! The solver is duplicated
CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero,&
& nlsym=nlsym, nlpos=nlpos)
ELSE ! The solver is distributed
CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero,&
& nlsym=nlsym, nlpos=nlpos, comm_in=MPI_COMM_WORLD)
END IF
mat%mumps_par%ICNTL(23) = 400
IF(me.EQ.0) THEN
WRITE(*,'(a/(20i6))') 'ICNTL =', mat%mumps_par%ICNTL
END IF
WRITE(*,'(a,i4.4,a,3i16)') 'PE', me, ' istart, iend, nloc', mat%istart, mat%iend, &
& mat%iend-mat%istart+1
!
CALL dismat(splxy, mat)
!
! BC on Matrix
!
CALL ibcmat(mat, ny)
tmat = seconds() - t0
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(mbess, splxy, rhs)
!
! BC on RHS
!
CALL ibcrhs(rhs, ny)
!
mem_loc = mem()
CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after DISMAT')
IF(me.EQ.0) THEN
CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
END IF
!===========================================================================
! 3.0 Solve the dicretized system
!
t0 = seconds()
CALL to_mat(mat)
WRITE(*,'(a/(10i6))') 'MInmax IRN_loc', MINVAL(mat%mumps_par%IRN_loc), MAXVAL(mat%mumps_par%IRN_loc)
WRITE(*,'(a/(10i6))') 'JCN_loc', MINVAL(mat%mumps_par%JCN_loc), MAXVAL(mat%mumps_par%JCN_loc)
tconv = seconds() -t0
CALL minmax_i(mat%nnz_loc, MPI_COMM_WORLD, 'local nnz')
IF(me.EQ.0) THEN
WRITE(*,'(a,i16)') 'Number of non-zeros of matrix = ', mat%nnz
END IF
!
! Write Matrix and RHS to file
!
IF(LEN_TRIM(matfile).GT.0) THEN
IF(me.EQ.0) THEN
OPEN(99, file=matfile, form='unformatted')
END IF
CALL write_matrix(99, mat, MPI_COMM_WORLD)
END IF
!
t0 = seconds()
CALL reord_mat(mat, nlmetis=nlmetis, debug=debug_mumps)
treord = seconds() - t0
!
t0 = seconds()
CALL numfact(mat, debug=debug_mumps)
tfact = seconds() - t0
!
!
mem_loc = mem()
CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after FACTOR')
IF(me.EQ.0) THEN
nzfact = mat%mumps_par%INFOG(29)
IF(nzfact<0) THEN
nzfact = -nzfact*1000000
END IF
WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A = ',nzfact
WRITE(*,'(a,f12.2)') 'Number of factorization MFLOPS = ',&
& mat%mumps_par%RINFOG(3)/1.e6
END IF
gflops1 = mat%mumps_par%RINFOG(3) / tfact / 1.d9
!
CALL bsolve(mat, rhs, sol, debug=debug_mumps)
!
IF(LEN_TRIM(matfile).GT.0) THEN
IF(me.EQ.0) THEN
WRITE(99) rhs
WRITE(99) sol
CLOSE(99)
END IF
END IF
!
t0 = seconds()
DO it=1,nits ! nits iterations for timing
CALL bsolve(mat, rhs, sol)
sol(1:ny-1) = sol(ny)
END DO
tsolv = (seconds() - t0)/REAL(nits)
!
mem_loc = mem()
CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after BSOLVE')
!
! Spline coefficients, taking into account of periodicity in y
! Note: in SOL, y was numbered first.
!
dimx = splxy%sp1%dim
dimy = splxy%sp2%dim
ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
DO j=0,dimy-1
DO i=0,dimx-1
ij = MODULO(j,ny) + i*ny + 1
bcoef(i,j) = sol(ij)
END DO
END DO
IF(me.EQ.0) THEN
WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
END IF
!===========================================================================
! 4.0 Check the solution
!
! Check function values computed with various method
!
IF(me.EQ.0) THEN
ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
DO i=0,nx
DO j=0,ny
solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
END DO
END DO
jder = (/0,0/)
!
! Compute PPFORM/BCOEFS at first call to gridval
CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
!
WRITE(*,'(/a)') '*** Checking solutions'
t0 = seconds()
DO it=1,nits ! nits iterations for timing
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
END DO
tgrid = (seconds() - t0)/REAL(nits)
errsol = solana - solcal
IF( SIZE(bcoef,2) .LE. 10 ) THEN
CALL prnmat('BCOEF', bcoef)
CALL prnmat('SOLANA', solana)
CALL prnmat('SOLCAL', solcal)
CALL prnmat('ERRSOL', errsol)
END IF
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
& norm2(errsol) / norm2(solana)
WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid
!
CALL putarr(fid, '/xgrid', xgrid, 'r')
CALL putarr(fid, '/ygrid', ygrid, '\theta')
CALL putarr(fid, '/sol', solcal, 'Solutions')
CALL putarr(fid, '/solana', solana,'Exact solutions')
CALL putarr(fid, '/errors', errsol, 'Errors')
!
! Check derivatives d/dx and d/dy
!
WRITE(*,'(/a)') '*** Checking gradient'
DO i=0,nx
DO j=0,ny
IF( mbess .EQ. 0 ) THEN
solana(i,j) = -2.0d0 * xgrid(i)
ELSE
solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
& xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
END IF
END DO
END DO
!
jder = (/1,0/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
errsol = solana - solcal
CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol)
!
DO i=0,nx
DO j=0,ny
solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
END DO
END DO
!
jder = (/0,1/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
errsol = solana - solcal
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol)
!
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv
WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv
WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1
DEALLOCATE(solcal, solana, errsol)
END IF
!===========================================================================
! 5.0 Clear the matrix and recompute
!
IF(me.EQ.0) WRITE(*,'(/a)') 'Recompute the solver ...'
t0 = seconds()
CALL clear_mat(mat)
CALL dismat(splxy, mat)
CALL ibcmat(mat, ny)
tmat = seconds()-t0
!
t0 = seconds()
CALL numfact(mat, debug=debug_mumps)
tfact = seconds()-t0
gflops1 = mat%mumps_par%RINFOG(3) / tfact / 1.d9
!
t0 = seconds()
ALLOCATE(newsol(nrank))
CALL bsolve(mat, rhs, newsol)
newsol(1:ny-1) = newsol(ny)
tsolv = seconds()-t0
!
IF(me.EQ.0) THEN
WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv
WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv
WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1
END IF
!
DEALLOCATE(newsol)
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xgrid, rhs, sol)
DEALLOCATE(bcoef)
CALL destroy_sp(splxy)
CALL destroy(mat)
!
IF(me.EQ.0) CALL closef(fid)
CALL mpi_finalize(ierr)
!===========================================================================
!
CONTAINS
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:,:)
DOUBLE PRECISION :: sum2
INTEGER :: i, j
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
DO j=1,SIZE(x,2)
sum2 = sum2 + x(i,j)**2
END DO
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
!
SUBROUTINE prnmat(label, mat)
CHARACTER(len=*) :: label
DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
INTEGER :: i
WRITE(*,'(/a)') TRIM(label)
DO i=1,SIZE(mat,1)
WRITE(*,'(10(1pe12.3))') mat(i,:)
END DO
END SUBROUTINE prnmat
!
SUBROUTINE minmax_i(k, comm, str)
CHARACTER(len=*), INTENT(in) :: str
INTEGER, INTENT(in) :: k
INTEGER, INTENT(in) :: comm
INTEGER :: me, ierr, kmin, kmax
CALL mpi_comm_rank(comm, me, ierr)
CALL mpi_reduce(k, kmin, 1, MPI_INTEGER, MPI_MIN, 0, comm, ierr)
CALL mpi_reduce(k, kmax, 1, MPI_INTEGER, MPI_MAX, 0, comm, ierr)
IF( me.EQ.0 ) THEN
WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax
END IF
END SUBROUTINE minmax_i
!
SUBROUTINE minmax_r(x, comm, str)
CHARACTER(len=*), INTENT(in) :: str
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(in) :: comm
INTEGER :: me, ierr
DOUBLE PRECISION :: xmin, xmax
CALL mpi_comm_rank(comm, me, ierr)
CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr)
CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr)
IF( me.EQ.0 ) THEN
WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax
END IF
END SUBROUTINE minmax_r
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
diff --git a/examples/pde2d_nh.f90 b/examples/pde2d_nh.f90
index ee5a008..98607b9 100644
--- a/examples/pde2d_nh.f90
+++ b/examples/pde2d_nh.f90
@@ -1,684 +1,684 @@
!>
!> @file pde2d_nh.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
!
! Solving the following 2d PDE using splines:
!
! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my),
! with BC: f(x=1,y) = cos(y)
!
! Exact solution: f(x,y) = (1-x^2) x^m cos(my) + x*cos(y)
!
MODULE pde2d_nh_mod
USE bsplines
USE matrix
IMPLICIT NONE
!
LOGICAL :: nlfix
CONTAINS
SUBROUTINE dismat(spl, mat)
!
! Assembly of FE matrix mat using spline spl
!
TYPE(spline2d), INTENT(in) :: spl
TYPE(gbmat), INTENT(inout) :: mat
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2
INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION:: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw ! Derivative order
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: coefs ! Terms in weak form
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms))
!
ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative
ALLOCATE(fun2(0:nidbas2,0:1)) !
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng2), wg2(ng2))
!===========================================================================
! 2.0 Assembly loop
!
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), spl%sp1, fun1, i)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), spl%sp2, fun2, j)
CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs)
DO iterm=1,kterms
DO iw1=0,nidbas1 ! Weight function in dir 1
igw1 = i+iw1
DO iw2=0,nidbas2 ! Weight function in dir 2
igw2 = MODULO(j+iw2-1, n2) + 1
irow = igw2 + (igw1-1)*n2
DO it1=0,nidbas1 ! Test function in dir 1
igt1 = i+it1
DO it2=0,nidbas2 ! Test function in dir 2
igt2 = MODULO(j+it2-1, n2) + 1
jcol = igt2 + (igt1-1)*n2
contrib = fun1(iw1,iderw(iterm,1)) * &
& fun2(iw2,iderw(iterm,2)) * &
& coefs(iterm) * &
& fun2(it2,idert(iterm,2)) * &
& fun1(it1,idert(iterm,1)) * &
& wg1(ig1) * wg2(ig2)
CALL updtmat(mat, irow, jcol, contrib)
END DO
END DO
END DO
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
DEALLOCATE(idert, iderw, coefs)
!
CONTAINS
SUBROUTINE coefeq(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
!
! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
!
c(1) = x !
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.d0/x
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE disrhs(mbess, spl, rhs)
!
! Assembly the RHS using 2d spline spl
!
IMPLICIT NONE
INTEGER, INTENT(in) :: mbess
TYPE(spline2d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
!
ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng2), wg2(ng2))
!===========================================================================
! 2.0 Assembly loop
!
nrank = SIZE(rhs)
rhs(1:nrank) = 0.0d0
!
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), spl%sp1, fun1, i)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), spl%sp2, fun2, j)
contrib = wg1(ig1)*wg2(ig2) * &
& rhseq(xg1(ig1),xg2(ig2), mbess)
DO k1=0,nidbas1
i1 = i+k1
DO k2=0,nidbas2
j2 = MODULO(j+k2-1,n2) + 1
ij = j2 + (i1-1)*n2
rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
DOUBLE PRECISION, INTENT(in) :: x1, x2
INTEGER, INTENT(in) :: m
rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
END FUNCTION rhseq
END SUBROUTINE disrhs
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcmat(mat, spl)
!
! Apply BC on matrix
!
IMPLICIT NONE
TYPE(gbmat), INTENT(inout) :: mat
TYPE(spline2d) :: spl
INTEGER :: nx, ndim1, nidbas1
INTEGER :: ny, ndim2, nidbas2
INTEGER :: kl, ku, nrank, i, j
INTEGER :: krow, kcol, jf
DOUBLE PRECISION :: yg
DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:), fun(:,:)
!===========================================================================
! 1.0 Prologue
!
CALL get_dim(spl%sp1, ndim1, nx, nidbas1)
CALL get_dim(spl%sp2, ndim2, ny, nidbas2)
!
kl = mat%kl
ku = mat%ku
nrank = mat%rank
ALLOCATE(zsum(nrank), arr(nrank))
ALLOCATE(fun(0:nidbas2,1))
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum on the NY-th row
!
zsum = 0.0d0
DO i=1,ny
arr = 0.0d0
CALL getrow(mat, i, arr)
DO j=1,ny+ku
zsum(j) = zsum(j) + arr(j)
END DO
END DO
CALL putrow(mat, ny, zsum)
!
! The horizontal sum on the NY-th column
!
zsum = 0.0d0
DO j=1,ny
arr = 0.0d0
CALL getcol(mat, j, arr)
DO i=ny,ny+kl
zsum(i) = zsum(i) + arr(i)
END DO
END DO
CALL putcol(mat, ny, zsum)
!
! The away operator
!
DO j = 1,ny-1
arr = 0.0d0; arr(j) = 1.0d0
CALL putcol(mat, j, arr)
END DO
!
DO i = 1,ny-1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!
!===========================================================================
! 3.0 Dirichlet on right boundary
!
i=nx+nidbas1 ! The last spline in X
DO j=1,ny
krow=(i-1)*ny+j
IF(MODULO(nidbas2,2) .EQ. 0 .AND. nlfix) THEN
yg = (spl%sp2%knots(j-1)+spl%sp2%knots(j))/2.0d0
ELSE
yg = spl%sp2%knots(j-1)
END IF
CALL basfun(yg, spl%sp2, fun, j)
arr = 0.0d0
DO jf=0,nidbas2
kcol=(i-1)*ny + MODULO(jf+j-1,ny)+1
arr(kcol) = arr(kcol)+fun(jf,1)
END DO
CALL putrow(mat, krow, arr)
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(zsum, arr)
DEALLOCATE(fun)
!
END SUBROUTINE ibcmat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcrhs(rhs, spl)
!
! Apply BC on RHS
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(inout) :: rhs(:)
TYPE(spline2d) :: spl
INTEGER :: nx, ndim1, nidbas1
INTEGER :: ny, ndim2, nidbas2
INTEGER :: nrank
INTEGER :: i, j, k
DOUBLE PRECISION :: xg, yg, zsum
!===========================================================================
! 1.0 Prologue
!
CALL get_dim(spl%sp1, ndim1, nx, nidbas1)
CALL get_dim(spl%sp2, ndim2, ny, nidbas2)
nrank = SIZE(rhs,1)
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum
!
zsum = SUM(rhs(1:ny))
rhs(ny) = zsum
rhs(1:ny-1) = 0.0d0
!===========================================================================
! 3.0 Dirichlet on right boundary
!
i = nx+nidbas1 ! The last spline index on x
xg = spl%sp1%knots(nx) ! Right boundary radial coordinate
DO j=1,ny
k = (i-1)*ny + j
IF(MODULO(nidbas2,2) .EQ. 0 .AND. nlfix) THEN
yg = (spl%sp2%knots(j-1)+spl%sp2%knots(j))/2.0d0
ELSE
yg = spl%sp2%knots(j-1)
END IF
rhs(k) = xg*COS(yg)
END DO
END SUBROUTINE ibcrhs
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE pde2d_nh_mod
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
PROGRAM main
!
USE pde2d_nh_mod
USE bsplines
USE matrix
USE futils
!
IMPLICIT NONE
INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
LOGICAL :: nlppform
INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it
DOUBLE PRECISION :: pi, coefx(5), coefy(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
TYPE(spline2d) :: splxy
TYPE(gbmat) :: mat
!
CHARACTER(len=128) :: file='pde2d_nh.h5'
INTEGER :: fid
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
DOUBLE PRECISION :: seconds, mem, dopla
DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
INTEGER :: nits=500
!
NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, coefx, coefy, nlfix
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number of intervals in x
ny = 8 ! Number of intervals in y
nidbas = (/3,3/) ! Degree of splines
ngauss = (/4,4/) ! Number of Gauss points/interval
mbess = 2 ! Exponent of differential problem
nterms = 2 ! Number of terms in weak form
nlppform = .TRUE. ! Use PPFORM for gridval or not
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
nlfix = .TRUE. ! Fix or not for even nidbas2
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x (=radial) & y (=poloidal) axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny))
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi
CALL meshdist(coefy, ygrid, ny)
!
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
CALL attach(fid, '/', 'MBESS', mbess)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
t0 = seconds()
CALL set_spline(nidbas, ngauss, &
& xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
!
! FE matrix assembly
!
nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix
kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals
ku = kl ! Number of super-diagnonals
WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
!
CALL init(kl, ku, nrank, nterms, mat)
CALL dismat(splxy, mat)
CALL putmat(fid, '/MAT0', mat, 'Assembled GB matrice')
ALLOCATE(arr(nrank))
!
! BC on Matrix
!
IF(nrank.LT.100) &
& WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', mat%val(kl+ku+1,:)
CALL ibcmat(mat, splxy)
tmat = seconds() - t0
IF(nrank.LT.100) &
& WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', mat%val(kl+ku+1,:)
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(mbess, splxy, rhs)
!
! BC on RHS
!
CALL ibcrhs(rhs, splxy)
CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
CALL putmat(fid, '/MAT1', mat, 'GB matrice with BC')
WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
!===========================================================================
! 3.0 Solve the dicretized system
!
t0 = seconds()
CALL factor(mat)
tfact = seconds() - t0
gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
t0 = seconds()
CALL bsolve(mat, rhs, sol)
!
! Backtransform of solution
!
sol(1:ny-1) = sol(ny)
!
! Spline coefficients, taking into account of periodicity in y
! Note: in SOL, y was numbered first.
!
dimx = splxy%sp1%dim
dimy = splxy%sp2%dim
ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
DO j=0,dimy-1
DO i=0,dimx-1
ij = MODULO(j,ny) + i*ny + 1
bcoef(i,j) = sol(ij)
END DO
END DO
WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
!
tsolv = seconds() - t0
gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
!===========================================================================
! 4.0 Check the solution
!
! Check function values computed with various method
!
ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
DO i=0,nx
DO j=0,ny
solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) &
& + xgrid(i)*COS(ygrid(j))
END DO
END DO
jder = (/0,0/)
!
! Compute PPFORM/BCOEFS at first call to gridval
CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
!
WRITE(*,'(/a)') '*** Checking solutions'
t0 = seconds()
DO it=1,nits ! nits iterations for timing
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
END DO
tgrid = (seconds() - t0)/REAL(nits)
errsol = solana - solcal
IF( SIZE(bcoef,2) .LE. 10 ) THEN
CALL prnmat('BCOEF', bcoef)
CALL prnmat('SOLANA', solana)
CALL prnmat('SOLCAL', solcal)
CALL prnmat('ERRSOL', errsol)
END IF
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
& norm2(errsol) / norm2(solana)
WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid
!
CALL putarr(fid, '/xgrid', xgrid, 'r')
CALL putarr(fid, '/ygrid', ygrid, '\theta')
CALL putarr(fid, '/sol', solcal, 'Solutions')
CALL putarr(fid, '/solana', solana,'Exact solutions')
CALL putarr(fid, '/errors', errsol, 'Errors')
!
! Check derivatives d/dx and d/dy
!
WRITE(*,'(/a)') '*** Checking gradient'
DO i=0,nx
DO j=0,ny
IF( mbess .EQ. 0 ) THEN
solana(i,j) = -2.0d0 * xgrid(i)
ELSE
solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
& xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) &
& + COS(ygrid(j))
END IF
END DO
END DO
!
jder = (/1,0/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
errsol = solana - solcal
CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
CALL putarr(fid, '/errors_x', errsol, 'Errors in d/dx')
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol)
!
DO i=0,nx
DO j=0,ny
solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * &
& SIN(mbess*ygrid(j)) &
& -xgrid(i)*SIN(ygrid(j))
END DO
END DO
!
jder = (/0,1/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
errsol = solana - solcal
CALL putarr(fid, '/errors_y', errsol, 'Errors in d/dy')
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol)
!===========================================================================
! 9.0 Epilogue
!
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv
WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
!
DEALLOCATE(xgrid, rhs, sol)
DEALLOCATE(solcal, solana, errsol)
DEALLOCATE(bcoef)
DEALLOCATE(arr)
CALL destroy_sp(splxy)
CALL destroy(mat)
!
CALL closef(fid)
!===========================================================================
!
CONTAINS
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:,:)
DOUBLE PRECISION :: sum2
INTEGER :: i, j
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
DO j=1,SIZE(x,2)
sum2 = sum2 + x(i,j)**2
END DO
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
SUBROUTINE prnmat(label, mat)
CHARACTER(len=*) :: label
DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
INTEGER :: i
WRITE(*,'(/a)') TRIM(label)
DO i=1,SIZE(mat,1)
WRITE(*,'(10(1pe12.3))') mat(i,:)
END DO
END SUBROUTINE prnmat
END PROGRAM main
!
!+++
!+++
diff --git a/examples/pde2d_pardiso.f90 b/examples/pde2d_pardiso.f90
index 276f727..53619d3 100644
--- a/examples/pde2d_pardiso.f90
+++ b/examples/pde2d_pardiso.f90
@@ -1,741 +1,741 @@
!>
!> @file pde2d_pardiso.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
!
! Solving the 2d PDE using splines and PARDISO non-symmetric matrix
!
! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
! exact solution: f(x,y) = (1-x^2) x^m cos(my)
!
MODULE pde2d_pardiso_mod
USE bsplines
USE pardiso_bsplines
IMPLICIT NONE
CONTAINS
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE dismat(spl, mat)
!
! Assembly of FE matrix mat using spline spl
!
TYPE(spline2d), INTENT(in) :: spl
TYPE(pardiso_mat), INTENT(inout) :: mat
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2
INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
DOUBLE PRECISION:: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order
DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form
INTEGER, ALLOCATABLE :: left1(:), left2(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
ALLOCATE(coefs(kterms,ng1,ng2))
!
ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
ALLOCATE(fun2(0:nidbas2,0:1,ng2)) !
!===========================================================================
! 2.0 Assembly loop
!
ALLOCATE(left1(ng1))
ALLOCATE(left2(ng2))
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
left1 = i
CALL basfun(xg1, spl%sp1, fun1, left1)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
left2 = j
CALL basfun(xg2, spl%sp2, fun2, left2)
!
DO ig1=1,ng1
DO ig2=1,ng2
CALL coefeq(xg1(ig1), xg2(ig2), &
& idert(:,:,ig1,ig2), &
& iderw(:,:,ig1,ig2), &
& coefs(:,ig1,ig2))
END DO
END DO
!
DO iw1=0,nidbas1 ! Weight function in dir 1
igw1 = i+iw1
DO iw2=0,nidbas2 ! Weight function in dir 2
igw2 = MODULO(j+iw2-1, n2) + 1
irow = igw2 + (igw1-1)*n2
DO it1=0,nidbas1 ! Test function in dir 1
igt1 = i+it1
DO it2=0,nidbas2 ! Test function in dir 2
igt2 = MODULO(j+it2-1, n2) + 1
jcol = igt2 + (igt1-1)*n2
!-------------
contrib = 0.0d0
DO ig1=1,ng1
DO ig2=1,ng2
DO iterm=1,kterms
contrib = contrib + &
& fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
& fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
& coefs(iterm,ig1,ig2) * &
& fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
& fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
& wg1(ig1) * wg2(ig2)
END DO
END DO
END DO
CALL updtmat(mat, irow, jcol, contrib)
!-------------
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
DEALLOCATE(idert, iderw, coefs)
DEALLOCATE(left1,left2)
!
CONTAINS
SUBROUTINE coefeq(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
!
! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
!
c(1) = x !
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.d0/x
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE disrhs(mbess, spl, rhs)
!
! Assembly the RHS using 2d spline spl
!
INTEGER, INTENT(in) :: mbess
TYPE(spline2d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
!
ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!===========================================================================
! 2.0 Assembly loop
!
nrank = SIZE(rhs)
rhs(1:nrank) = 0.0d0
!
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), spl%sp1, fun1, i)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), spl%sp2, fun2, j)
contrib = wg1(ig1)*wg2(ig2) * &
& rhseq(xg1(ig1),xg2(ig2), mbess)
DO k1=0,nidbas1
i1 = i+k1
DO k2=0,nidbas2
j2 = MODULO(j+k2-1,n2) + 1
ij = j2 + (i1-1)*n2
rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
DOUBLE PRECISION, INTENT(in) :: x1, x2
INTEGER, INTENT(in) :: m
rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
END FUNCTION rhseq
END SUBROUTINE disrhs
!
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcmat(mat, ny)
!
! Apply BC on matrix
!
TYPE(pardiso_mat), INTENT(inout) :: mat
INTEGER, INTENT(in) :: ny
INTEGER :: nrank, i, j
DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
!===========================================================================
! 1.0 Prologue
!
nrank = mat%rank
ALLOCATE(zsum(nrank), arr(nrank))
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum on the NY-th row
!
zsum = 0.0d0
DO i=1,ny
arr = 0.0d0
CALL getrow(mat, i, arr)
zsum(:) = zsum(:) + arr(:)
END DO
CALL putrow(mat, ny, zsum)
!
! The horizontal sum on the NY-th column
!
zsum = 0.0d0
DO j=1,ny
arr = 0.0d0
CALL getcol(mat, j, arr)
zsum(ny:) = zsum(ny:) + arr(ny:)
END DO
CALL putcol(mat, ny, zsum)
!
! The away operator
!
DO j = 1,ny-1
arr = 0.0d0; arr(j) = 1.0d0
CALL putcol(mat, j, arr)
END DO
!
DO i = 1,ny-1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!
!===========================================================================
! 3.0 Dirichlet on right boundary
!
DO j = nrank, nrank-ny+1, -1
arr = 0.0d0; arr(j) = 1.0d0
CALL putcol(mat, j, arr)
END DO
!
DO i = nrank, nrank-ny+1, -1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(zsum, arr)
!
END SUBROUTINE ibcmat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcrhs(rhs, ny)
!
! Apply BC on RHS
!
DOUBLE PRECISION, INTENT(inout) :: rhs(:)
INTEGER, INTENT(in) :: ny
INTEGER :: nrank
DOUBLE PRECISION :: zsum
!===========================================================================
! 1.0 Prologue
!
nrank = SIZE(rhs,1)
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum
!
zsum = SUM(rhs(1:ny))
rhs(ny) = zsum
rhs(1:ny-1) = 0.0d0
!===========================================================================
! 3.0 Dirichlet on right boundary
!
rhs(nrank-ny+1:nrank) = 0.0d0
END SUBROUTINE ibcrhs
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE coefeq_poisson(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
!
c(1) = x !
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.d0/x
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq_poisson
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE pde2d_pardiso_mod
PROGRAM main
USE pde2d_pardiso_mod
USE futils
USE conmat_mod
!
IMPLICIT NONE
INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
LOGICAL :: nlppform, nlconmat
INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
DOUBLE PRECISION :: pi, coefx(5), coefy(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
TYPE(spline2d) :: splxy
TYPE(pardiso_mat) :: mat
!
CHARACTER(len=128) :: file='pde2d_pardiso.h5'
INTEGER :: fid
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
DOUBLE PRECISION :: seconds, mem, dopla
DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
DOUBLE PRECISION :: tconv, treord
INTEGER :: nits=100
LOGICAL :: nlmetis, nlforce_zero
!
NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlmetis, &
& nlforce_zero, nlconmat, coefx, coefy
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number of intervals in x
ny = 8 ! Number of intervals in y
nidbas = (/3,3/) ! Degree of splines
ngauss = (/4,4/) ! Number of Gauss points/interval
mbess = 2 ! Exponent of differential problem
nterms = 2 ! Number of terms in weak form
nlppform = .TRUE. ! Use PPFORM for gridval or not
nlmetis = .FALSE. ! Use metis ordering or minimum degree
nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele
nlconmat = .TRUE. ! Use CONMAT instead of DISMAT
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x (=radial) & y (=poloidal) axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny))
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi
CALL meshdist(coefy, ygrid, ny)
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
CALL attach(fid, '/', 'MBESS', mbess)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
CALL set_spline(nidbas, ngauss, &
& xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
!!$ WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
!!$ WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
!
! FE matrix assembly
!
nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix
WRITE(*,'(a,i8)') 'nrank', nrank
!
CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero)
t0 = seconds()
IF(nlconmat) THEN
CALL conmat(splxy, mat, coefeq_poisson)
ELSE
CALL dismat(splxy, mat)
END IF
tmat = seconds() - t0
ALLOCATE(arr(nrank))
IF(nrank.LT.100) THEN
DO i=1,nrank
CALL getele(mat, i, i, arr(i))
END DO
WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr
END IF
!
! BC on Matrix
!
WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
CALL ibcmat(mat, ny)
IF(nrank.LT.100) THEN
DO i=1,nrank
CALL getele(mat, i, i, arr(i))
END DO
WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr
WRITE(*,'(a)') 'Last rows'
DO i=nrank-ny,nrank
CALL getrow(mat, i, arr)
WRITE(*,'(10(1pe12.3))') arr
END DO
END IF
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(mbess, splxy, rhs)
!
! BC on RHS
!
CALL ibcrhs(rhs, ny)
!
CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
!===========================================================================
! 3.0 Solve the dicretized system
!
t0 = seconds()
CALL to_mat(mat)
tconv = seconds() -t0
WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat)
WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem()
!
t0 = seconds()
CALL reord_mat(mat, nlmetis=nlmetis, debug=.FALSE.)
CALL putmat(fid, '/MAT', mat)
treord = seconds() - t0
!
t0 = seconds()
CALL numfact(mat, debug=.FALSE.)
tfact = seconds() - t0
WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A = ',mat%p%iparm(18)
WRITE(*,'(a,i12)') 'Number of factorization MFLOPS = ',mat%p%iparm(19)
gflops1 = mat%p%iparm(19) / tfact / 1.d3
!
CALL bsolve(mat, rhs, sol, debug=.FALSE.)
t0 = seconds()
DO it=1,nits ! nits iterations for timing
CALL bsolve(mat, rhs, sol)
sol(1:ny-1) = sol(ny)
END DO
WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
tsolv = (seconds() - t0)/REAL(nits)
!
! Spline coefficients, taking into account of periodicity in y
! Note: in SOL, y was numbered first.
!
dimx = splxy%sp1%dim
dimy = splxy%sp2%dim
ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
DO j=0,dimy-1
DO i=0,dimx-1
ij = MODULO(j,ny) + i*ny + 1
bcoef(i,j) = sol(ij)
END DO
END DO
WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
!===========================================================================
! 4.0 Check the solution
!
! Check function values computed with various method
!
ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
DO i=0,nx
DO j=0,ny
solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
END DO
END DO
jder = (/0,0/)
!
! Compute PPFORM/BCOEFS at first call to gridval
CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
!
WRITE(*,'(/a)') '*** Checking solutions'
t0 = seconds()
DO it=1,nits ! nits iterations for timing
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
END DO
tgrid = (seconds() - t0)/REAL(nits)
errsol = solana - solcal
IF( SIZE(bcoef,2) .LE. 10 ) THEN
CALL prnmat('BCOEF', bcoef)
CALL prnmat('SOLANA', solana)
CALL prnmat('SOLCAL', solcal)
CALL prnmat('ERRSOL', errsol)
END IF
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
& norm2(errsol) / norm2(solana)
WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid
!
CALL putarr(fid, '/xgrid', xgrid, 'r')
CALL putarr(fid, '/ygrid', ygrid, '\theta')
CALL putarr(fid, '/sol', solcal, 'Solutions')
CALL putarr(fid, '/solana', solana,'Exact solutions')
CALL putarr(fid, '/errors', errsol, 'Errors')
!
! Check derivatives d/dx and d/dy
!
WRITE(*,'(/a)') '*** Checking gradient'
DO i=0,nx
DO j=0,ny
IF( mbess .EQ. 0 ) THEN
solana(i,j) = -2.0d0 * xgrid(i)
ELSE
solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
& xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
END IF
END DO
END DO
!
jder = (/1,0/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
errsol = solana - solcal
CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol)
!
DO i=0,nx
DO j=0,ny
solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
END DO
END DO
!
jder = (/0,1/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
errsol = solana - solcal
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol)
!
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv
WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv
WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1
!===========================================================================
! 5.0 Clear the matrix and recompute
!
WRITE(*,'(/a)') 'Recompute the solver ...'
CALL clear_mat(mat)
t0 = seconds()
IF(nlconmat) THEN
CALL conmat(splxy, mat, coefeq_poisson)
ELSE
CALL dismat(splxy, mat)
END IF
tmat = seconds()-t0
CALL ibcmat(mat, ny)
!
t0 = seconds()
CALL numfact(mat, debug=.FALSE.)
tfact = seconds()-t0
gflops1 = mat%p%iparm(19) / tfact / 1.d3
!
t0 = seconds()
ALLOCATE(newsol(nrank))
CALL bsolve(mat, rhs, newsol)
newsol(1:ny-1) = newsol(ny)
tsolv = seconds()-t0
!
WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv
WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv
WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1
!
DEALLOCATE(newsol)
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xgrid, rhs, sol)
DEALLOCATE(solcal, solana, errsol)
DEALLOCATE(bcoef)
DEALLOCATE(arr)
CALL destroy_sp(splxy)
CALL destroy(mat)
!
CALL closef(fid)
!===========================================================================
!
CONTAINS
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:,:)
DOUBLE PRECISION :: sum2
INTEGER :: i, j
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
DO j=1,SIZE(x,2)
sum2 = sum2 + x(i,j)**2
END DO
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
SUBROUTINE prnmat(label, mat)
CHARACTER(len=*) :: label
DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
INTEGER :: i
WRITE(*,'(/a)') TRIM(label)
DO i=1,SIZE(mat,1)
WRITE(*,'(10(1pe12.3))') mat(i,:)
END DO
END SUBROUTINE prnmat
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
diff --git a/examples/pde2d_pb.f90 b/examples/pde2d_pb.f90
index e764273..bd18342 100644
--- a/examples/pde2d_pb.f90
+++ b/examples/pde2d_pb.f90
@@ -1,696 +1,696 @@
!>
!> @file pde2d_pb.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Solving the following 2d PDE using splines:
!
! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
! exact solution: f(x,y) = (1-x^2) x^m cos(my)
!
USE bsplines
USE matrix
USE conmat_mod
!
IMPLICIT NONE
INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
LOGICAL :: nlppform, nlconmat
INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it
DOUBLE PRECISION :: pi, coefx(5), coefy(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
TYPE(spline2d) :: splxy
TYPE(pbmat) :: mat
!
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
DOUBLE PRECISION :: seconds, mem, dopla
DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
INTEGER :: nits=500
!
INTERFACE
SUBROUTINE dismat(spl, mat)
USE bsplines
USE matrix
TYPE(spline2d), INTENT(in) :: spl
TYPE(pbmat), INTENT(inout) :: mat
END SUBROUTINE dismat
SUBROUTINE disrhs(mbess, spl, rhs)
USE bsplines
INTEGER, INTENT(in) :: mbess
TYPE(spline2d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
END SUBROUTINE disrhs
SUBROUTINE meshdist(coefs, x, nx)
DOUBLE PRECISION, INTENT(in) :: coefs(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
END SUBROUTINE meshdist
SUBROUTINE ibcmat(mat, ny)
USE matrix
TYPE(pbmat), INTENT(inout) :: mat
INTEGER, INTENT(in) :: ny
END SUBROUTINE ibcmat
SUBROUTINE ibcrhs(rhs, ny)
DOUBLE PRECISION, INTENT(inout) :: rhs(:)
INTEGER, INTENT(in) :: ny
END SUBROUTINE ibcrhs
SUBROUTINE coefeq(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(:)
END SUBROUTINE coefeq
END INTERFACE
!
NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlconmat, &
& coefx, coefy
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number of intervals in x
ny = 8 ! Number of intervals in y
nidbas = (/3,3/) ! Degree of splines
ngauss = (/4,4/) ! Number of Gauss points/interval
mbess = 2 ! Exponent of differential problem
nterms = 2 ! Number of terms in weak form
nlppform = .TRUE. ! Use PPFORM for gridval or not
nlconmat = .TRUE. ! Use CONMAT instead of DISMAT
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x (=radial) & y (=poloidal) axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny))
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi
CALL meshdist(coefy, ygrid, ny)
!
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
CALL set_spline(nidbas, ngauss, &
& xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
!!$ WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
!!$ WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
!
! FE matrix assembly
!
nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix
kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals
ku = kl ! Number of super-diagnonals
WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
!
CALL init(ku, nrank, nterms, mat)
t0 = seconds()
IF(nlconmat) THEN
CALL conmat(splxy, mat, coefeq)
ELSE
CALL dismat(splxy, mat)
END IF
tmat = seconds() - t0
ALLOCATE(arr(nrank))
!
! BC on Matrix
!
IF(nrank.LT.100) &
& WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', mat%val(ku+1,:)
CALL ibcmat(mat, ny)
IF(nrank.LT.100) &
& WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', mat%val(ku+1,:)
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(mbess, splxy, rhs)
!
! BC on RHS
!
CALL ibcrhs(rhs, ny)
WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
!===========================================================================
! 3.0 Solve the dicretized system
!
t0 = seconds()
CALL factor(mat)
tfact = seconds() - t0
gflops1 = dopla('DPBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
t0 = seconds()
CALL bsolve(mat, rhs, sol)
!
! Backtransform of solution
!
sol(1:ny-1) = sol(ny)
!
! Spline coefficients, taking into account of periodicity in y
! Note: in SOL, y was numbered first.
!
dimx = splxy%sp1%dim
dimy = splxy%sp2%dim
ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
DO j=0,dimy-1
DO i=0,dimx-1
ij = MODULO(j,ny) + i*ny + 1
bcoef(i,j) = sol(ij)
END DO
END DO
WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
!
tsolv = seconds() - t0
gflops2 = dopla('DPBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
!===========================================================================
! 4.0 Check the solution
!
! Check function values computed with various method
!
ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
DO i=0,nx
DO j=0,ny
solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
END DO
END DO
jder = (/0,0/)
!
! Compute PPFORM at first call to gridval
IF(nlppform) THEN
CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
END IF
!
WRITE(*,'(/a)') '*** Checking solutions'
t0 = seconds()
DO it=1,nits ! nits iterations for timing
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
END DO
tgrid = (seconds() - t0)/REAL(nits)
errsol = solana - solcal
IF( SIZE(bcoef,2) .LE. 10 ) THEN
CALL prnmat('BCOEF', bcoef)
CALL prnmat('SOLANA', solana)
CALL prnmat('SOLCAL', solcal)
CALL prnmat('ERRSOL', errsol)
END IF
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
& norm2(errsol) / norm2(solana)
WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid
!
! Check derivatives d/dx and d/dy
!
WRITE(*,'(/a)') '*** Checking gradient'
DO i=0,nx
DO j=0,ny
IF( mbess .EQ. 0 ) THEN
solana(i,j) = -2.0d0 * xgrid(i)
ELSE
solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
& xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
END IF
END DO
END DO
!
jder = (/1,0/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
errsol = solana - solcal
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol)
!
DO i=0,nx
DO j=0,ny
solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
END DO
END DO
!
jder = (/0,1/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
errsol = solana - solcal
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol)
!===========================================================================
! 9.0 Epilogue
!
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv
WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
!
DEALLOCATE(xgrid, rhs, sol)
DEALLOCATE(solcal, solana, errsol)
DEALLOCATE(bcoef)
DEALLOCATE(arr)
CALL destroy_sp(splxy)
CALL destroy(mat)
!
!===========================================================================
!
CONTAINS
SUBROUTINE prntmat(str, a)
DOUBLE PRECISION, DIMENSION(:,:) :: a
CHARACTER(len=*) :: str
INTEGER :: i
WRITE(*,'(a)') TRIM(str)
DO i=1,SIZE(a,1)
WRITE(*,'(10f8.1)') a(i,:)
END DO
END SUBROUTINE prntmat
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:,:)
DOUBLE PRECISION :: sum2
INTEGER :: i, j
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
DO j=1,SIZE(x,2)
sum2 = sum2 + x(i,j)**2
END DO
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
SUBROUTINE prnmat(label, mat)
CHARACTER(len=*) :: label
DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
INTEGER :: i
WRITE(*,'(/a)') TRIM(label)
DO i=1,SIZE(mat,1)
WRITE(*,'(10(1pe12.3))') mat(i,:)
END DO
END SUBROUTINE prnmat
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
SUBROUTINE dismat(spl, mat)
!
! Assembly of FE matrix mat using spline spl
!
USE bsplines
USE matrix
IMPLICIT NONE
TYPE(spline2d), INTENT(in) :: spl
TYPE(pbmat), INTENT(inout) :: mat
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2
INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
DOUBLE PRECISION:: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order
DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form
INTEGER, ALLOCATABLE :: left1(:), left2(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
ALLOCATE(coefs(kterms,ng1,ng2))
!
ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
ALLOCATE(fun2(0:nidbas2,0:1,ng2)) !
!===========================================================================
! 2.0 Assembly loop
!
ALLOCATE(left1(ng1))
ALLOCATE(left2(ng2))
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
left1 = i
CALL basfun(xg1, spl%sp1, fun1, left1)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
left2 = j
CALL basfun(xg2, spl%sp2, fun2, left2)
!
DO ig1=1,ng1
DO ig2=1,ng2
CALL coefeq(xg1(ig1), xg2(ig2), &
& idert(:,:,ig1,ig2), &
& iderw(:,:,ig1,ig2), &
& coefs(:,ig1,ig2))
END DO
END DO
!
DO iw1=0,nidbas1 ! Weight function in dir 1
igw1 = i+iw1
DO iw2=0,nidbas2 ! Weight function in dir 2
igw2 = MODULO(j+iw2-1, n2) + 1
irow = igw2 + (igw1-1)*n2
DO it1=0,nidbas1 ! Test function in dir 1
igt1 = i+it1
DO it2=0,nidbas2 ! Test function in dir 2
igt2 = MODULO(j+it2-1, n2) + 1
jcol = igt2 + (igt1-1)*n2
!-------------
contrib = 0.0d0
DO ig1=1,ng1
DO ig2=1,ng2
DO iterm=1,kterms
contrib = contrib + &
& fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
& fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
& coefs(iterm,ig1,ig2) * &
& fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
& fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
& wg1(ig1) * wg2(ig2)
END DO
END DO
END DO
CALL updtmat(mat, irow, jcol, contrib)
!-------------
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
DEALLOCATE(idert, iderw, coefs)
DEALLOCATE(left1,left2)
!
CONTAINS
SUBROUTINE coefeq(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
!
c(1) = x !
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.d0/x
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
SUBROUTINE disrhs(mbess, spl, rhs)
!
! Assembly the RHS using 2d spline spl
!
USE bsplines
IMPLICIT NONE
INTEGER, INTENT(in) :: mbess
TYPE(spline2d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
!
ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!===========================================================================
! 2.0 Assembly loop
!
nrank = SIZE(rhs)
rhs(1:nrank) = 0.0d0
!
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), spl%sp1, fun1, i)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), spl%sp2, fun2, j)
contrib = wg1(ig1)*wg2(ig2) * &
& rhseq(xg1(ig1),xg2(ig2), mbess)
DO k1=0,nidbas1
i1 = i+k1
DO k2=0,nidbas2
j2 = MODULO(j+k2-1,n2) + 1
ij = j2 + (i1-1)*n2
rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
DOUBLE PRECISION, INTENT(in) :: x1, x2
INTEGER, INTENT(in) :: m
rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
END FUNCTION rhseq
END SUBROUTINE disrhs
SUBROUTINE ibcmat(mat, ny)
!
! Apply BC on matrix
!
USE matrix
IMPLICIT NONE
TYPE(pbmat), INTENT(inout) :: mat
INTEGER, INTENT(in) :: ny
INTEGER :: kl, ku, nrank, i, j
DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
INTEGER :: i0, ii
INTEGER :: i0_arr(ny)
!===========================================================================
! 1.0 Prologue
!
ku = mat%ku
kl = ku
nrank = mat%rank
ALLOCATE(zsum(nrank), arr(nrank))
!
i0 = nrank - ku
WRITE(*,'(a,i6)') 'Estimated i0', i0
DO i=1,ny
CALL getcol(mat, nrank-ny+i, arr)
DO ii=1,nrank
i0_arr(i)=ii
IF(arr(ii) .NE. 0.0d0) EXIT
END DO
END DO
!!$ WRITE(*,'(a/(10i6))') 'i0_arr', i0_arr
!
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum on the NY-th row
!
zsum = 0.0d0
DO i=1,ny
arr = 0.0d0
CALL getrow(mat, i, arr)
DO j=1,ny+ku
zsum(j) = zsum(j) + arr(j)
END DO
END DO
!
zsum(ny) = SUM(zsum(1:ny)) ! using symmetry
CALL putrow(mat, ny, zsum)
!
! The away operator
!
DO i = 1,ny-1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!===========================================================================
! 3.0 Dirichlet on right boundary
!
DO i = nrank, nrank-ny+1, -1
CALL getcol(mat, i, arr)
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(zsum, arr)
!
END SUBROUTINE ibcmat
!+++
SUBROUTINE ibcrhs(rhs, ny)
!
! Apply BC on RHS
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(inout) :: rhs(:)
INTEGER, INTENT(in) :: ny
INTEGER :: nrank
DOUBLE PRECISION :: zsum
!===========================================================================
! 1.0 Prologue
!
nrank = SIZE(rhs,1)
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum
!
zsum = SUM(rhs(1:ny))
rhs(ny) = zsum
rhs(1:ny-1) = 0.0d0
!===========================================================================
! 3.0 Dirichlet on right boundary
!
rhs(nrank-ny+1:nrank) = 0.0d0
END SUBROUTINE ibcrhs
!++++
SUBROUTINE coefeq(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
!
c(1) = x !
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.d0/x
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
diff --git a/examples/pde2d_petsc.f90 b/examples/pde2d_petsc.f90
index 4378b3f..54cd8db 100644
--- a/examples/pde2d_petsc.f90
+++ b/examples/pde2d_petsc.f90
@@ -1,795 +1,795 @@
!>
!> @file pde2d_petsc.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
!
! Solving the 2d PDE using splines and PETSC matrix
!
! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
! exact solution: f(x,y) = (1-x^2) x^m cos(my)
!
MODULE pde2d_petsc_mod
USE bsplines
USE petsc_bsplines
IMPLICIT NONE
CONTAINS
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE dismat(spl, mat)
!
! Assembly of FE matrix mat using spline spl
!
TYPE(spline2d), INTENT(in) :: spl
TYPE(petsc_mat), INTENT(inout) :: mat
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2
INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
DOUBLE PRECISION:: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order
DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form
INTEGER, ALLOCATABLE :: left1(:), left2(:)
!
INTEGER :: istart, iend
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
!!$ WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
!!$ WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
ALLOCATE(coefs(kterms,ng1,ng2))
!
ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
ALLOCATE(fun2(0:nidbas2,0:1,ng2)) !
!
! Matrix partition
!
istart = mat%istart
iend = mat%iend
!===========================================================================
! 2.0 Assembly loop
!
ALLOCATE(left1(ng1))
ALLOCATE(left2(ng2))
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
left1 = i
CALL basfun(xg1, spl%sp1, fun1, left1)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
left2 = j
CALL basfun(xg2, spl%sp2, fun2, left2)
!
DO ig1=1,ng1
DO ig2=1,ng2
CALL coefeq(xg1(ig1), xg2(ig2), &
& idert(:,:,ig1,ig2), &
& iderw(:,:,ig1,ig2), &
& coefs(:,ig1,ig2))
END DO
END DO
!
DO iw1=0,nidbas1 ! Weight function in dir 1
igw1 = i+iw1
DO iw2=0,nidbas2 ! Weight function in dir 2
igw2 = MODULO(j+iw2-1, n2) + 1
irow = igw2 + (igw1-1)*n2
IF( irow.GE.istart .AND. irow.LE.iend) THEN
DO it1=0,nidbas1 ! Test function in dir 1
igt1 = i+it1
DO it2=0,nidbas2 ! Test function in dir 2
igt2 = MODULO(j+it2-1, n2) + 1
jcol = igt2 + (igt1-1)*n2
!-------------
contrib = 0.0d0
DO ig1=1,ng1
DO ig2=1,ng2
DO iterm=1,kterms
contrib = contrib + &
& fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
& fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
& coefs(iterm,ig1,ig2) * &
& fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
& fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
& wg1(ig1) * wg2(ig2)
END DO
END DO
END DO
CALL updtmat(mat, irow, jcol, contrib)
!-------------
END DO
END DO
END IF
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
DEALLOCATE(idert, iderw, coefs)
DEALLOCATE(left1,left2)
!
CONTAINS
SUBROUTINE coefeq(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
!
! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
!
c(1) = x !
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.d0/x
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE disrhs(mbess, spl, rhs)
!
! Assembly the RHS using 2d spline spl
!
INTEGER, INTENT(in) :: mbess
TYPE(spline2d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
!
ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!===========================================================================
! 2.0 Assembly loop
!
nrank = SIZE(rhs)
rhs(1:nrank) = 0.0d0
!
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), spl%sp1, fun1, i)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), spl%sp2, fun2, j)
contrib = wg1(ig1)*wg2(ig2) * &
& rhseq(xg1(ig1),xg2(ig2), mbess)
DO k1=0,nidbas1
i1 = i+k1
DO k2=0,nidbas2
j2 = MODULO(j+k2-1,n2) + 1
ij = j2 + (i1-1)*n2
rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
DOUBLE PRECISION, INTENT(in) :: x1, x2
INTEGER, INTENT(in) :: m
rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
END FUNCTION rhseq
END SUBROUTINE disrhs
!
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcmat(mat, ny)
!
! Apply BC on matrix
!
TYPE(petsc_mat), INTENT(inout) :: mat
INTEGER, INTENT(in) :: ny
INTEGER :: nrank, i, j
DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
!===========================================================================
! 1.0 Prologue
!
nrank = mat%rank
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum on the NY-th row
!
ALLOCATE(zsum(nrank), arr(nrank))
zsum = 0.0d0
DO i=1,ny
arr = 0.0d0
CALL getrow(mat, i, arr)
zsum(:) = zsum(:) + arr(:)
END DO
IF(mat%nlsym) THEN
zsum(ny) = SUM(zsum(1:ny)) ! using symmetry
END IF
CALL putrow(mat, ny, zsum)
!
! The horizontal sum on the NY-th column
!
IF( .NOT.mat%nlsym) THEN
zsum = 0.0d0
DO j=1,ny
arr = 0.0d0
CALL getcol(mat, j, arr)
zsum(ny:) = zsum(ny:) + arr(ny:)
END DO
CALL putcol(mat, ny, zsum)
END IF
!
! The away operator
!
IF( .NOT.mat%nlsym) THEN
DO j = 1,ny-1
arr = 0.0d0; arr(j) = 1.0d0
CALL putcol(mat, j, arr)
END DO
END IF
!
DO i = 1,ny-1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
DEALLOCATE(zsum)
DEALLOCATE(arr)
!
!===========================================================================
! 3.0 Dirichlet on right boundary
!
ALLOCATE(arr(nrank))
IF( .NOT.mat%nlsym) THEN
DO j = nrank, nrank-ny+1, -1
arr = 0.0d0; arr(j) = 1.0d0
CALL putcol(mat, j, arr)
END DO
END IF
!
DO i = nrank, nrank-ny+1, -1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
DEALLOCATE(arr)
!===========================================================================
! 9.0 Epilogue
!
END SUBROUTINE ibcmat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcrhs(rhs, ny)
!
! Apply BC on RHS
!
DOUBLE PRECISION, INTENT(inout) :: rhs(:)
INTEGER, INTENT(in) :: ny
INTEGER :: nrank
DOUBLE PRECISION :: zsum
!===========================================================================
! 1.0 Prologue
!
nrank = SIZE(rhs,1)
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum
!
zsum = SUM(rhs(1:ny))
rhs(ny) = zsum
rhs(1:ny-1) = 0.0d0
!===========================================================================
! 3.0 Dirichlet on right boundary
!
rhs(nrank-ny+1:nrank) = 0.0d0
END SUBROUTINE ibcrhs
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE pde2d_petsc_mod
PROGRAM main
USE pde2d_petsc_mod
USE futils
!
IMPLICIT NONE
INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
INTEGER :: nitmax=10000, nits, nits0, ntrials=0
DOUBLE PRECISION :: rtol=1.e-9
LOGICAL :: nlppform
INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
DOUBLE PRECISION :: pi, coefx(5), coefy(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
DOUBLE PRECISION, ALLOCATABLE :: row_sum(:), row(:)
TYPE(spline2d) :: splxy
TYPE(petsc_mat) :: mat
!
CHARACTER(len=128) :: file='pde2d_petsc.h5'
INTEGER :: fid
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
DOUBLE PRECISION :: mem, dopla
DOUBLE PRECISION :: t0, tmat, tsolv, tsolv0, tgrid, gflops1, gflops2
LOGICAL :: nlsym
LOGICAL :: nlforce_zero
!
INTEGER :: ierr, me
INTEGER(kind=8) :: nzfact
INTEGER :: nnz_loc, nnz
DOUBLE PRECISION :: mem_loc, mem_min, mem_max
!
CHARACTER(len=128) :: matfile='mat.dat'
logical :: file_exist
!
NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlsym,&
& nlforce_zero, coefx, coefy, nitmax, rtol, ntrials, &
& matfile
!===========================================================================
! 1.0 Prologue
!
CALL mpi_init(ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
!
! Read in data specific to run
!
nx = 8 ! Number of intervals in x
ny = 8 ! Number of intervals in y
nidbas = (/3,3/) ! Degree of splines
ngauss = (/4,4/) ! Number of Gauss points/interval
mbess = 2 ! Exponent of differential problem
nterms = 2 ! Number of terms in weak form
nlppform = .TRUE. ! Use PPFORM for gridval or not
nlsym = .FALSE. ! Symmetric or unsymmetric matrix
nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
nitmax = 10000 ! Max number ofviterations
rtol = 1.e-9 ! Relative tolerance
ntrials = 0 ! Run ntrials solution steps after setup
matfile = '' ! Save matrix file to matfile if not empty
!
IF(me.EQ.0) THEN
READ(*,newrun)
WRITE(*,newrun)
END IF
CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(mbess, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nterms, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlppform, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlforce_zero, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(coefx, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(coefy, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nitmax, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(ntrials, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(matfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
!
! Define grid on x (=radial) & y (=poloidal) axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny))
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi
CALL meshdist(coefy, ygrid, ny)
!
! Create hdf5 file
!
IF(me.EQ.0) THEN
CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
CALL attach(fid, '/', 'MBESS', mbess)
END IF
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
t0 = mpi_wtime()
CALL set_spline(nidbas, ngauss, &
& xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
!
! FE matrix assembly
!
nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix
IF(me.EQ.0) WRITE(*,'(a,i12 )') 'nrank', nrank
!
CALL init(nrank, nterms, mat, comm=MPI_COMM_WORLD)
!
INQUIRE(file=TRIM(matfile), exist=file_exist)
IF( file_exist ) THEN
t0 = mpi_wtime()
CALL load_mat(mat, matfile)
tmat = mpi_wtime()-t0
if(me.eq.0) WRITE(*,'(a,1pe12.3)') 'Mat read time (s) ', tmat
ELSE
t0 = mpi_wtime()
CALL dismat(splxy, mat)
CALL ibcmat(mat, ny)
!
!!$ ALLOCATE(row_sum(mat%istart:mat%iend))
!!$ ALLOCATE(row(mat%rank))
!!$ DO i=mat%istart,mat%iend
!!$ row = 0.0d0
!!$ CALL getrow(mat, i, row)
!!$ row_sum(i) = SUM(row)
!!$ END DO
!!$ WRITE(*,'(a,i3.3,a,(10(1pe12.3)))') 'PE', me, ': row_sum', row_sum
!
CALL to_mat(mat)
!
!!$ DO i=mat%istart,mat%iend
!!$ row = 0.0d0
!!$ CALL getrow(mat, i, row)
!!$ row_sum(i) = SUM(row)
!!$ END DO
!!$ WRITE(*,'(a,i3.3,a,(10(1pe12.3)))') 'PE', me, ': row_sum(after)', row_sum
CALL save_mat(mat, matfile)
tmat = mpi_wtime() - t0
IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'Mat construction time (s) ', tmat
END IF
!
IF(me.EQ.0) THEN
WRITE(*,'(a,2i16)') 'Mat rank, nnz', mat%rank, mat%nnz
END IF
!
! RHS assembly
!
t0=mpi_wtime()
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(mbess, splxy, rhs)
!
! BC on RHS
!
CALL ibcrhs(rhs, ny)
!
mem_loc = mem()
CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after DISMAT')
IF(me.EQ.0) THEN
CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
END IF
IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'RHS construction time (s) ', mpi_wtime()-t0
!===========================================================================
! 3.0 Solve the dicretized system
!
CALL minmax_i8(mat%nnz_loc, MPI_COMM_WORLD, 'local nnz')
IF(me.EQ.0) THEN
WRITE(*,'(a,i16)') 'Number of non-zeros of matrix = ', mat%nnz
END IF
!
t0 = mpi_wtime()
CALL bsolve(mat, rhs, sol, rtol, nitmax, nits0)
tsolv0 = mpi_wtime() - t0
IF(me.EQ.0) WRITE(*,'(a,1pe12.3,i8)') 'Solve+setup time(s) and nits ', tsolv0, nits0
!
IF(ntrials .GT. 0) THEN
t0 = mpi_wtime()
DO it=1,ntrials ! ntrials iterations for timing
sol = 0.0d0
CALL bsolve(mat, rhs, sol, rtol, nitmax, nits)
sol(1:ny-1) = sol(ny)
END DO
tsolv = (mpi_wtime() - t0)/REAL(ntrials)
IF(me.EQ.0) WRITE(*,'(a,1pe12.3,i8)') 'Solve time(s) and nits ', tsolv, nits
END IF
!
mem_loc = mem()
CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after BSOLVE')
!
CALL destroy(mat)
!
! Spline coefficients, taking into account of periodicity in y
! Note: in SOL, y was numbered first.
!
dimx = splxy%sp1%dim
dimy = splxy%sp2%dim
ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
DO j=0,dimy-1
DO i=0,dimx-1
ij = MODULO(j,ny) + i*ny + 1
bcoef(i,j) = sol(ij)
END DO
END DO
!
mem_loc = mem()
CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after setting bcoef')
!===========================================================================
! 4.0 Check the solution
!
! Check function values computed with various method
!
IF(me.EQ.0) THEN
ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
DO i=0,nx
DO j=0,ny
solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
END DO
END DO
jder = (/0,0/)
!
! Compute PPFORM/BCOEFS at first call to gridval
CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
!
WRITE(*,'(/a)') '*** Checking solutions'
errsol = solana - solcal
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
& norm2(errsol) / norm2(solana)
!
CALL putarr(fid, '/xgrid', xgrid, 'r')
CALL putarr(fid, '/ygrid', ygrid, '\theta')
CALL putarr(fid, '/sol', solcal, 'Solutions')
CALL putarr(fid, '/solana', solana,'Exact solutions')
CALL putarr(fid, '/errors', errsol, 'Errors')
!
! Check derivatives d/dx and d/dy
!
WRITE(*,'(/a)') '*** Checking gradient'
DO i=0,nx
DO j=0,ny
IF( mbess .EQ. 0 ) THEN
solana(i,j) = -2.0d0 * xgrid(i)
ELSE
solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
& xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
END IF
END DO
END DO
!
jder = (/1,0/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
errsol = solana - solcal
CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol)
!
DO i=0,nx
DO j=0,ny
solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
END DO
END DO
!
jder = (/0,1/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
errsol = solana - solcal
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol)
!
DEALLOCATE(solcal, solana, errsol)
END IF
!!$!===========================================================================
!!$! 5.0 Clear the matrix and recompute
!!$!
!!$ IF(me.EQ.0) WRITE(*,'(/a)') 'Recompute the solver ...'
!!$ t0 = mpi_wtime()()
!!$ CALL clear_mat(mat)
!!$ CALL dismat(splxy, mat)
!!$ CALL ibcmat(mat, ny)
!!$ tmat = mpi_wtime()()-t0
!!$!
!!$ t0 = mpi_wtime()()
!!$ CALL numfact(mat, debug=.FALSE.)
!!$ tfact = mpi_wtime()()-t0
!!$ gflops1 = mat%petsc_par%RINFOG(3) / tfact / 1.d9
!!$!
!!$ t0 = mpi_wtime()()
!!$ ALLOCATE(newsol(nrank))
!!$ CALL bsolve(mat, rhs, newsol)
!!$ newsol(1:ny-1) = newsol(ny)
!!$ tsolv = mpi_wtime()()-t0
!!$!
!!$ IF(me.EQ.0) THEN
!!$ WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
!!$ WRITE(*,'(/a)') '---'
!!$ WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
!!$ WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
!!$ WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv
!!$ WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv
!!$ WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1
!!$ END IF
!!$!
!!$ DEALLOCATE(newsol)
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xgrid, rhs, sol)
DEALLOCATE(bcoef)
!!$ CALL destroy(mat)
CALL destroy_sp(splxy)
!
IF(me.EQ.0) CALL closef(fid)
CALL mpi_finalize(ierr)
!===========================================================================
!
CONTAINS
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:,:)
DOUBLE PRECISION :: sum2
INTEGER :: i, j
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
DO j=1,SIZE(x,2)
sum2 = sum2 + x(i,j)**2
END DO
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
!
SUBROUTINE minmax_i(k, comm, str)
CHARACTER(len=*), INTENT(in) :: str
INTEGER, INTENT(in) :: k
INTEGER, INTENT(in) :: comm
INTEGER :: me, ierr, kmin, kmax
CALL mpi_comm_rank(comm, me, ierr)
CALL mpi_reduce(k, kmin, 1, MPI_INTEGER, MPI_MIN, 0, comm, ierr)
CALL mpi_reduce(k, kmax, 1, MPI_INTEGER, MPI_MAX, 0, comm, ierr)
IF( me.EQ.0 ) THEN
WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax
END IF
END SUBROUTINE minmax_i
!
SUBROUTINE minmax_i8(k, comm, str)
CHARACTER(len=*), INTENT(in) :: str
INTEGER(8), INTENT(in) :: k
INTEGER, INTENT(in) :: comm
INTEGER :: me, ierr
INTEGER(8) :: kmin, kmax
CALL mpi_comm_rank(comm, me, ierr)
CALL mpi_reduce(k, kmin, 1, MPI_INTEGER8, MPI_MIN, 0, comm, ierr)
CALL mpi_reduce(k, kmax, 1, MPI_INTEGER8, MPI_MAX, 0, comm, ierr)
IF( me.EQ.0 ) THEN
WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax
END IF
END SUBROUTINE minmax_i8
!!
SUBROUTINE minmax_r(x, comm, str)
CHARACTER(len=*), INTENT(in) :: str
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(in) :: comm
INTEGER :: me, ierr
DOUBLE PRECISION :: xmin, xmax
CALL mpi_comm_rank(comm, me, ierr)
CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr)
CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr)
IF( me.EQ.0 ) THEN
WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax
END IF
END SUBROUTINE minmax_r
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
diff --git a/examples/pde2d_pwsmp.f90 b/examples/pde2d_pwsmp.f90
index 1cd7592..7e001a3 100644
--- a/examples/pde2d_pwsmp.f90
+++ b/examples/pde2d_pwsmp.f90
@@ -1,776 +1,776 @@
!>
!> @file pde2d_pwsmp.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
!
! Solving the 2d PDE using splines and WSMP non-symmetric matrix
!
! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
! exact solution: f(x,y) = (1-x^2) x^m cos(my)
!
MODULE pde2d_pwsmp_mod
USE bsplines
USE pwsmp_bsplines
IMPLICIT NONE
CONTAINS
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE dismat(spl, mat)
!
! Assembly of FE matrix mat using spline spl
!
TYPE(spline2d), INTENT(in) :: spl
TYPE(wsmp_mat), INTENT(inout) :: mat
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2
INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
DOUBLE PRECISION:: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order
DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form
INTEGER, ALLOCATABLE :: left1(:), left2(:)
!
INTEGER :: istart, iend
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
ALLOCATE(coefs(kterms,ng1,ng2))
!
ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
ALLOCATE(fun2(0:nidbas2,0:1,ng2)) !
!
! Matrix partition
!
istart = mat%istart
iend = mat%iend
!===========================================================================
! 2.0 Assembly loop
!
ALLOCATE(left1(ng1))
ALLOCATE(left2(ng2))
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
left1 = i
CALL basfun(xg1, spl%sp1, fun1, left1)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
left2 = j
CALL basfun(xg2, spl%sp2, fun2, left2)
!
DO ig1=1,ng1
DO ig2=1,ng2
CALL coefeq(xg1(ig1), xg2(ig2), &
& idert(:,:,ig1,ig2), &
& iderw(:,:,ig1,ig2), &
& coefs(:,ig1,ig2))
END DO
END DO
!
DO iw1=0,nidbas1 ! Weight function in dir 1
igw1 = i+iw1
DO iw2=0,nidbas2 ! Weight function in dir 2
igw2 = MODULO(j+iw2-1, n2) + 1
irow = igw2 + (igw1-1)*n2
IF( irow.GE.istart .AND. irow.LE.iend) THEN
DO it1=0,nidbas1 ! Test function in dir 1
igt1 = i+it1
DO it2=0,nidbas2 ! Test function in dir 2
igt2 = MODULO(j+it2-1, n2) + 1
jcol = igt2 + (igt1-1)*n2
!-------------
contrib = 0.0d0
DO ig1=1,ng1
DO ig2=1,ng2
DO iterm=1,kterms
contrib = contrib + &
& fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
& fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
& coefs(iterm,ig1,ig2) * &
& fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
& fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
& wg1(ig1) * wg2(ig2)
END DO
END DO
END DO
CALL updtmat(mat, irow, jcol, contrib)
!-------------
END DO
END DO
END IF
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
DEALLOCATE(idert, iderw, coefs)
DEALLOCATE(left1,left2)
!
CONTAINS
SUBROUTINE coefeq(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
!
! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
!
c(1) = x !
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.d0/x
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE disrhs(mbess, spl, rhs)
!
! Assembly the RHS using 2d spline spl
!
INTEGER, INTENT(in) :: mbess
TYPE(spline2d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
!
ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!===========================================================================
! 2.0 Assembly loop
!
nrank = SIZE(rhs)
rhs(1:nrank) = 0.0d0
!
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), spl%sp1, fun1, i)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), spl%sp2, fun2, j)
contrib = wg1(ig1)*wg2(ig2) * &
& rhseq(xg1(ig1),xg2(ig2), mbess)
DO k1=0,nidbas1
i1 = i+k1
DO k2=0,nidbas2
j2 = MODULO(j+k2-1,n2) + 1
ij = j2 + (i1-1)*n2
rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
DOUBLE PRECISION, INTENT(in) :: x1, x2
INTEGER, INTENT(in) :: m
rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
END FUNCTION rhseq
END SUBROUTINE disrhs
!
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcmat(mat, ny)
!
! Apply BC on matrix
!
TYPE(wsmp_mat), INTENT(inout) :: mat
INTEGER, INTENT(in) :: ny
INTEGER :: nrank, i, j
DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
!===========================================================================
! 1.0 Prologue
!
nrank = mat%rank
ALLOCATE(zsum(nrank), arr(nrank))
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum on the NY-th row
!
zsum = 0.0d0
DO i=1,ny
arr = 0.0d0
CALL getrow(mat, i, arr)
zsum(:) = zsum(:) + arr(:)
END DO
CALL putrow(mat, ny, zsum)
!
! The horizontal sum on the NY-th column
!
IF( .NOT.mat%nlsym) THEN
zsum = 0.0d0
DO j=1,ny
arr = 0.0d0
CALL getcol(mat, j, arr)
zsum(ny:) = zsum(ny:) + arr(ny:)
END DO
CALL putcol(mat, ny, zsum)
END IF
!
! The away operator
!
IF( .NOT.mat%nlsym) THEN
DO j = 1,ny-1
arr = 0.0d0; arr(j) = 1.0d0
CALL putcol(mat, j, arr)
END DO
END IF
!
DO i = 1,ny-1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!
!===========================================================================
! 3.0 Dirichlet on right boundary
!
DO j = nrank, nrank-ny+1, -1
arr = 0.0d0; arr(j) = 1.0d0
CALL putcol(mat, j, arr)
END DO
!
DO i = nrank, nrank-ny+1, -1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(zsum, arr)
!
END SUBROUTINE ibcmat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcrhs(rhs, ny)
!
! Apply BC on RHS
!
DOUBLE PRECISION, INTENT(inout) :: rhs(:)
INTEGER, INTENT(in) :: ny
INTEGER :: nrank
DOUBLE PRECISION :: zsum
!===========================================================================
! 1.0 Prologue
!
nrank = SIZE(rhs,1)
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum
!
zsum = SUM(rhs(1:ny))
rhs(ny) = zsum
rhs(1:ny-1) = 0.0d0
!===========================================================================
! 3.0 Dirichlet on right boundary
!
rhs(nrank-ny+1:nrank) = 0.0d0
END SUBROUTINE ibcrhs
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE pde2d_pwsmp_mod
PROGRAM main
USE pde2d_pwsmp_mod
USE futils
!
IMPLICIT NONE
INCLUDE 'mpif.h'
INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
LOGICAL :: nlppform
INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
DOUBLE PRECISION :: pi, coefx(5), coefy(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
TYPE(spline2d) :: splxy
TYPE(wsmp_mat) :: mat
!
CHARACTER(len=128) :: file='pde2d_wsmp.h5'
INTEGER :: fid
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
DOUBLE PRECISION :: seconds, mem, dopla
DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
DOUBLE PRECISION :: tconv, treord
INTEGER :: nits=100
LOGICAL :: nlsym, nlforce_zero
!
INTEGER :: ierr, me, nprocs
DOUBLE PRECISION :: mem_loc, mem_min, mem_max
!
NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, &
& nlsym, nlforce_zero, coefx, coefy
!===========================================================================
! 1.0 Prologue
!
CALL mpi_init(ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
CALL mpi_comm_size(MPI_COMM_WORLD, nprocs, ierr)
!
! Read in data specific to run
!
nx = 8 ! Number of intervals in x
ny = 8 ! Number of intervals in y
nidbas = (/3,3/) ! Degree of splines
ngauss = (/4,4/) ! Number of Gauss points/interval
mbess = 2 ! Exponent of differential problem
nterms = 2 ! Number of terms in weak form
nlppform = .TRUE. ! Use PPFORM for gridval or not
nlsym = .TRUE. ! Symmetric matrix or not
nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
IF(me.EQ.0) THEN
READ(*,newrun)
WRITE(*,newrun)
END IF
CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(mbess, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nterms, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlppform, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlforce_zero, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(coefx, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(coefy, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
!
! Define grid on x (=radial) & y (=poloidal) axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny))
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi
CALL meshdist(coefy, ygrid, ny)
!
! Create hdf5 file
!
if(me.eq.0) then
CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
CALL attach(fid, '/', 'MBESS', mbess)
end if
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
t0 = seconds()
CALL set_spline(nidbas, ngauss, &
& xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
!
! FE matrix assembly
!
nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix
IF(me.EQ.0) THEN
WRITE(*,'(a,i8,a,i4)') 'nrank =', nrank, ' nprocs =', nprocs
END IF
!
CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero, nlsym=nlsym, &
& comm_in=MPI_COMM_WORLD)
WRITE(*,'(a,i4.4,a,3i16)') 'PE', me, ' istart, iend, nloc', mat%istart, mat%iend, &
& mat%iend-mat%istart+1
!
CALL dismat(splxy, mat)
!
! BC on Matrix
!
CALL ibcmat(mat, ny)
tmat = seconds() - t0
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(mbess, splxy, rhs)
!
! BC on RHS
!
CALL ibcrhs(rhs, ny)
!
mem_loc = mem()
CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after DISMAT')
IF(me.EQ.0) THEN
CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
END IF
!===========================================================================
! 3.0 Solve the dicretized system
!
t0 = seconds()
CALL to_mat(mat)
tconv = seconds() -t0
CALL minmax_i(mat%nnz_loc, MPI_COMM_WORLD, 'local nnz')
IF(me.EQ.0) THEN
WRITE(*,'(a,i16)') 'Number of non-zeros of matrix = ', mat%nnz
END IF
!
t0 = seconds()
CALL reord_mat(mat)
treord = seconds() - t0
!
t0 = seconds()
CALL numfact(mat)
tfact = seconds() - t0
!
mem_loc = mem()
CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after FACTOR')
!
IF(me.EQ.0) THEN
WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24)
WRITE(*,'(a,1pe12.3)') 'Number of factorization GFLOPS = ',mat%p%dparm(23)/1.d9
END IF
gflops1 = mat%p%dparm(23) / tfact / 1.d9
!
CALL bsolve(mat, rhs, sol)
!
t0 = seconds()
DO it=1,nits ! nits iterations for timing
CALL bsolve(mat, rhs, sol)
sol(1:ny-1) = sol(ny)
END DO
tsolv = (seconds() - t0)/REAL(nits)
!
mem_loc = mem()
CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after BSOLVE')
!
! Spline coefficients, taking into account of periodicity in y
! Note: in SOL, y was numbered first.
!
dimx = splxy%sp1%dim
dimy = splxy%sp2%dim
ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
DO j=0,dimy-1
DO i=0,dimx-1
ij = MODULO(j,ny) + i*ny + 1
bcoef(i,j) = sol(ij)
END DO
END DO
IF(me.EQ.0) THEN
WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
END IF
!===========================================================================
! 4.0 Check the solution
!
! Check function values computed with various method (only on proc 0)
!
IF(me.EQ.0) THEN
ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
DO i=0,nx
DO j=0,ny
solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
END DO
END DO
jder = (/0,0/)
!
! Compute PPFORM/BCOEFS at first call to gridval
CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
!
WRITE(*,'(/a)') '*** Checking solutions'
t0 = seconds()
DO it=1,nits ! nits iterations for timing
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
END DO
tgrid = (seconds() - t0)/REAL(nits)
errsol = solana - solcal
IF( SIZE(bcoef,2) .LE. 10 ) THEN
CALL prnmat('BCOEF', bcoef)
CALL prnmat('SOLANA', solana)
CALL prnmat('SOLCAL', solcal)
CALL prnmat('ERRSOL', errsol)
END IF
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
& norm2(errsol) / norm2(solana)
WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid
!
CALL putarr(fid, '/xgrid', xgrid, 'r')
CALL putarr(fid, '/ygrid', ygrid, '\theta')
CALL putarr(fid, '/sol', solcal, 'Solutions')
CALL putarr(fid, '/solana', solana,'Exact solutions')
CALL putarr(fid, '/errors', errsol, 'Errors')
!
! Check derivatives d/dx and d/dy
!
WRITE(*,'(/a)') '*** Checking gradient'
DO i=0,nx
DO j=0,ny
IF( mbess .EQ. 0 ) THEN
solana(i,j) = -2.0d0 * xgrid(i)
ELSE
solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
& xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
END IF
END DO
END DO
!
jder = (/1,0/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
errsol = solana - solcal
CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol)
!
DO i=0,nx
DO j=0,ny
solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
END DO
END DO
!
jder = (/0,1/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
errsol = solana - solcal
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol)
!
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv
WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv
WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1
DEALLOCATE(solcal)
DEALLOCATE(solana)
DEALLOCATE(errsol)
END IF
!===========================================================================
! 5.0 Clear the matrix and recompute
!
IF(me.EQ.0) WRITE(*,'(/a)') 'Recompute the solver ...'
t0 = seconds()
CALL clear_mat(mat)
CALL dismat(splxy, mat)
CALL ibcmat(mat, ny)
tmat = seconds()-t0
!
t0 = seconds()
!!$ CALL numfact(mat)
CALL factor(mat, nlreord=.FALSE.)
tfact = seconds()-t0
gflops1 = mat%p%dparm(23) / tfact / 1.d9
!
t0 = seconds()
ALLOCATE(newsol(nrank))
CALL bsolve(mat, rhs, newsol)
newsol(1:ny-1) = newsol(ny)
tsolv = seconds()-t0
!
IF(me.EQ.0) THEN
WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s) ', tsolv
WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv
WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1
END IF
!
DEALLOCATE(newsol)
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(bcoef)
DEALLOCATE(xgrid, rhs, sol)
9999 CONTINUE
CALL mpi_barrier(MPI_COMM_WORLD, ierr)
CALL destroy_sp(splxy)
CALL destroy(mat)
!
IF(me.EQ.0) CALL closef(fid)
CALL mpi_finalize(ierr)
!===========================================================================
!
CONTAINS
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:,:)
DOUBLE PRECISION :: sum2
INTEGER :: i, j
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
DO j=1,SIZE(x,2)
sum2 = sum2 + x(i,j)**2
END DO
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
SUBROUTINE prnmat(label, mat)
CHARACTER(len=*) :: label
DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
INTEGER :: i
WRITE(*,'(/a)') TRIM(label)
DO i=1,SIZE(mat,1)
WRITE(*,'(10(1pe12.3))') mat(i,:)
END DO
END SUBROUTINE prnmat
!
SUBROUTINE minmax_i(k, comm, str)
CHARACTER(len=*), INTENT(in) :: str
INTEGER, INTENT(in) :: k
INTEGER, INTENT(in) :: comm
INTEGER :: me, ierr, kmin, kmax
CALL mpi_comm_rank(comm, me, ierr)
CALL mpi_reduce(k, kmin, 1, MPI_INTEGER, MPI_MIN, 0, comm, ierr)
CALL mpi_reduce(k, kmax, 1, MPI_INTEGER, MPI_MAX, 0, comm, ierr)
IF( me.EQ.0 ) THEN
WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax
END IF
END SUBROUTINE minmax_i
!
SUBROUTINE minmax_r(x, comm, str)
CHARACTER(len=*), INTENT(in) :: str
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(in) :: comm
INTEGER :: me, ierr
DOUBLE PRECISION :: xmin, xmax
CALL mpi_comm_rank(comm, me, ierr)
CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr)
CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr)
IF( me.EQ.0 ) THEN
WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax
END IF
END SUBROUTINE minmax_r
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
diff --git a/examples/pde2d_sym_pardiso.f90 b/examples/pde2d_sym_pardiso.f90
index 47c7cda..fcd23bf 100644
--- a/examples/pde2d_sym_pardiso.f90
+++ b/examples/pde2d_sym_pardiso.f90
@@ -1,715 +1,715 @@
!>
!> @file pde2d_sym_pardiso.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
!
! Solving the 2d PDE using splines and PARDISO symmetric matrix
!
! -d/dx[x C d/dx]f - 1x/d/dy[Cd/dy] f = \rho, with f(x=1,y) = 0
! C(x,y) = 1 + \epsilon x cos(y)
! exact solution: f(x,y) = (1-x^2) x^m cos(my)
!
MODULE pde2d_sym_pardiso_mod
USE bsplines
USE pardiso_bsplines
IMPLICIT NONE
CONTAINS
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE dismat(spl, epsi, mat)
!
! Assembly of FE matrix mat using spline spl
!
TYPE(spline2d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(in) :: epsi
TYPE(pardiso_mat), INTENT(inout) :: mat
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2
INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
DOUBLE PRECISION:: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order
DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form
INTEGER, ALLOCATABLE :: left1(:), left2(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
ALLOCATE(coefs(kterms,ng1,ng2))
!
ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
ALLOCATE(fun2(0:nidbas2,0:1,ng2)) !
!===========================================================================
! 2.0 Assembly loop
!
ALLOCATE(left1(ng1))
ALLOCATE(left2(ng2))
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
left1 = i
CALL basfun(xg1, spl%sp1, fun1, left1)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
left2 = j
CALL basfun(xg2, spl%sp2, fun2, left2)
!
DO ig1=1,ng1
DO ig2=1,ng2
CALL coefeq(xg1(ig1), xg2(ig2), &
& idert(:,:,ig1,ig2), &
& iderw(:,:,ig1,ig2), &
& coefs(:,ig1,ig2))
END DO
END DO
!
DO iw1=0,nidbas1 ! Weight function in dir 1
igw1 = i+iw1
DO iw2=0,nidbas2 ! Weight function in dir 2
igw2 = MODULO(j+iw2-1, n2) + 1
irow = igw2 + (igw1-1)*n2
DO it1=0,nidbas1 ! Test function in dir 1
igt1 = i+it1
DO it2=0,nidbas2 ! Test function in dir 2
igt2 = MODULO(j+it2-1, n2) + 1
jcol = igt2 + (igt1-1)*n2
!-------------
contrib = 0.0d0
DO ig1=1,ng1
DO ig2=1,ng2
DO iterm=1,kterms
contrib = contrib + &
& fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
& fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
& coefs(iterm,ig1,ig2) * &
& fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
& fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
& wg1(ig1) * wg2(ig2)
END DO
END DO
END DO
CALL updtmat(mat, irow, jcol, contrib)
!-------------
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
DEALLOCATE(idert, iderw, coefs)
DEALLOCATE(left1,left2)
!
CONTAINS
SUBROUTINE coefeq(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
!
DOUBLE PRECISION :: zcoef
!
! Weak form = Int(x*C*dw/dx*dt/dx + C/x*dw/dy*dt/dy)dxdy
! C(x,y) = 1 + epsilon*x*cos(y)
!
zcoef = 1.0d0 + epsi*x*COS(y)
!
c(1) = x*zcoef !
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = zcoef/x
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE disrhs(mbess, epsi, spl, rhs)
!
! Assembly the RHS using 2d spline spl
!
INTEGER, INTENT(in) :: mbess
DOUBLE PRECISION, INTENT(in) :: epsi
TYPE(spline2d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
!
ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!===========================================================================
! 2.0 Assembly loop
!
nrank = SIZE(rhs)
rhs(1:nrank) = 0.0d0
!
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), spl%sp1, fun1, i)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), spl%sp2, fun2, j)
contrib = wg1(ig1)*wg2(ig2) * &
& rhseq(xg1(ig1),xg2(ig2), mbess)
DO k1=0,nidbas1
i1 = i+k1
DO k2=0,nidbas2
j2 = MODULO(j+k2-1,n2) + 1
ij = j2 + (i1-1)*n2
rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x, y, m)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(in) :: m
DOUBLE PRECISION :: xm
!
xm = REAL(m,8)
rhseq = x**(m+1) * ( 4.0d0*(xm+1.0d0)*COS(xm*y) + &
& epsi*x*( &
& ( (3.0d0*(xm+1.0d0) - xm/x**2)*COS((xm-1.0d0)*y) + &
& (3.0d0+2.0d0*xm)*COS((xm+1.0d0)*y) ) &
& ))
END FUNCTION rhseq
END SUBROUTINE disrhs
!
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcmat(mat, ny)
!
! Apply BC on matrix
!
TYPE(pardiso_mat), INTENT(inout) :: mat
INTEGER, INTENT(in) :: ny
INTEGER :: nrank, i, j
DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
!===========================================================================
! 1.0 Prologue
!
nrank = mat%rank
ALLOCATE(zsum(nrank), arr(nrank))
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum on the NY-th row
!
zsum = 0.0d0
DO i=1,ny
arr = 0.0d0
CALL getrow(mat, i, arr)
zsum(:) = zsum(:) + arr(:)
END DO
zsum(ny) = SUM(zsum(1:ny)) ! using symmetry
CALL putrow(mat, ny, zsum)
!
! The away operator
!
DO i = 1,ny-1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!
!===========================================================================
! 3.0 Dirichlet on right boundary
!
DO i = nrank, nrank-ny+1, -1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(zsum, arr)
!
END SUBROUTINE ibcmat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcrhs(rhs, ny)
!
! Apply BC on RHS
!
DOUBLE PRECISION, INTENT(inout) :: rhs(:)
INTEGER, INTENT(in) :: ny
INTEGER :: nrank
DOUBLE PRECISION :: zsum
!===========================================================================
! 1.0 Prologue
!
nrank = SIZE(rhs,1)
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum
!
zsum = SUM(rhs(1:ny))
rhs(ny) = zsum
rhs(1:ny-1) = 0.0d0
!===========================================================================
! 3.0 Dirichlet on right boundary
!
rhs(nrank-ny+1:nrank) = 0.0d0
END SUBROUTINE ibcrhs
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE pde2d_sym_pardiso_mod
PROGRAM main
USE pde2d_sym_pardiso_mod
USE futils
!
IMPLICIT NONE
INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
LOGICAL :: nlppform
INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
DOUBLE PRECISION :: pi, epsi, coefx(5), coefy(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
TYPE(spline2d) :: splxy
TYPE(pardiso_mat) :: mat
!
CHARACTER(len=128) :: file='pde2d_sym_pardiso.h5'
INTEGER :: fid
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
DOUBLE PRECISION :: seconds, mem, dopla
DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
DOUBLE PRECISION :: tconv, treord
INTEGER :: nits=100
LOGICAL :: nlmetis, nlforce_zero, nlpos
!
NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, epsi, nlppform, nlmetis, &
& nlforce_zero, nlpos, coefx, coefy
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number of intervals in x
ny = 8 ! Number of intervals in y
nidbas = (/3,3/) ! Degree of splines
ngauss = (/4,4/) ! Number of Gauss points/interval
mbess = 2 ! Exponent of differential problem
epsi = 0.5 ! Non-uniformity in the Laplacian coefficicient
nterms = 2 ! Number of terms in weak form
nlppform = .TRUE. ! Use PPFORM for gridval or not
nlmetis = .FALSE. ! Use metis ordering or minimum degree
nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele
nlpos = .TRUE. ! Matrix is positive definite
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x (=radial) & y (=poloidal) axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny))
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi
CALL meshdist(coefy, ygrid, ny)
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
CALL attach(fid, '/', 'MBESS', mbess)
CALL attach(fid, '/', 'EPSI', epsi)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
CALL set_spline(nidbas, ngauss, &
& xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
!
! FE matrix assembly
!
nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix
WRITE(*,'(a,i8)') 'nrank', nrank
!
t0 = seconds()
CALL init(nrank, nterms, mat, nlsym=.TRUE.)
CALL dismat(splxy, epsi, mat)
ALLOCATE(arr(nrank))
IF(nrank.LT.100) THEN
DO i=1,nrank
CALL getele(mat, i, i, arr(i))
END DO
WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr
END IF
!
! BC on Matrix
!
WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
CALL ibcmat(mat, ny)
IF(nrank.LT.100) THEN
DO i=1,nrank
CALL getele(mat, i, i, arr(i))
END DO
WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr
WRITE(*,'(a)') 'Last rows'
DO i=nrank-ny,nrank
CALL getrow(mat, i, arr)
WRITE(*,'(10(1pe12.3))') arr
END DO
END IF
tmat = seconds() - t0
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(mbess, epsi, splxy, rhs)
!
! BC on RHS
!
CALL ibcrhs(rhs, ny)
!
CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
!===========================================================================
! 3.0 Solve the dicretized system
!
t0 = seconds()
!!$ CALL factor(mat)
!
! The call to "factor" could be split into the
! 3 following calls
!
CALL to_mat(mat)
tconv = seconds() -t0
WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat)
WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem()
!
t0 = seconds()
CALL reord_mat(mat, nlmetis=nlmetis, debug=.FALSE.)
CALL putmat(fid, '/MAT', mat)
treord = seconds() - t0
!
t0 = seconds()
CALL numfact(mat, debug=.FALSE.)
tfact = seconds() - t0
WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A = ',mat%p%iparm(18)
WRITE(*,'(a,i12)') 'Number of factorization MFLOPS = ',mat%p%iparm(19)
gflops1 = mat%p%iparm(19) / tfact / 1.d3
!
CALL bsolve(mat, rhs, sol, debug=.FALSE.)
t0 = seconds()
DO it=1,nits ! nits iterations for timing
CALL bsolve(mat, rhs, sol)
sol(1:ny-1) = sol(ny)
END DO
WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
tsolv = (seconds() - t0)/REAL(nits)
!
! Spline coefficients, taking into account of periodicity in y
! Note: in SOL, y was numbered first.
!
dimx = splxy%sp1%dim
dimy = splxy%sp2%dim
ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
DO j=0,dimy-1
DO i=0,dimx-1
ij = MODULO(j,ny) + i*ny + 1
bcoef(i,j) = sol(ij)
END DO
END DO
WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
!===========================================================================
! 4.0 Check the solution
!
! Check function values computed with various method
!
ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
DO i=0,nx
DO j=0,ny
solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
END DO
END DO
jder = (/0,0/)
!
! Compute PPFORM/BCOEFS at first call to gridval
CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
!
WRITE(*,'(/a)') '*** Checking solutions'
t0 = seconds()
DO it=1,nits ! nits iterations for timing
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
END DO
tgrid = (seconds() - t0)/REAL(nits)
errsol = solana - solcal
IF( SIZE(bcoef,2) .LE. 10 ) THEN
CALL prnmat('BCOEF', bcoef)
CALL prnmat('SOLANA', solana)
CALL prnmat('SOLCAL', solcal)
CALL prnmat('ERRSOL', errsol)
END IF
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
& norm2(errsol) / norm2(solana)
WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid
!
CALL putarr(fid, '/xgrid', xgrid, 'r')
CALL putarr(fid, '/ygrid', ygrid, '\theta')
CALL putarr(fid, '/sol', solcal, 'Solutions')
CALL putarr(fid, '/solana', solana,'Exact solutions')
CALL putarr(fid, '/errors', errsol, 'Errors')
!
! Check derivatives d/dx and d/dy
!
WRITE(*,'(/a)') '*** Checking gradient'
DO i=0,nx
DO j=0,ny
IF( mbess .EQ. 0 ) THEN
solana(i,j) = -2.0d0 * xgrid(i)
ELSE
solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
& xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
END IF
END DO
END DO
!
jder = (/1,0/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
errsol = solana - solcal
CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol)
!
DO i=0,nx
DO j=0,ny
solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
END DO
END DO
!
jder = (/0,1/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
errsol = solana - solcal
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol)
!
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv
WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv
WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1
!===========================================================================
! 5.0 Clear the matrix and recompute
!
WRITE(*,'(/a)') 'Recompute the solver ...'
t0 = seconds()
CALL clear_mat(mat)
CALL dismat(splxy, epsi, mat)
CALL ibcmat(mat, ny)
tmat = seconds()-t0
!
t0 = seconds()
!!$ CALL numfact(mat, debug=.FALSE.)
CALL factor(mat, nlreord=.FALSE., debug=.FALSE.)
tfact = seconds()-t0
gflops1 = mat%p%iparm(19) / tfact / 1.d3
!
t0 = seconds()
ALLOCATE(newsol(nrank))
CALL bsolve(mat, rhs, newsol)
newsol(1:ny-1) = newsol(ny)
tsolv = seconds()-t0
!
WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s) ', tsolv
WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv
WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1
!
DEALLOCATE(newsol)
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xgrid, rhs, sol)
DEALLOCATE(solcal, solana, errsol)
DEALLOCATE(bcoef)
DEALLOCATE(arr)
CALL destroy_sp(splxy)
CALL destroy(mat)
!
CALL closef(fid)
!===========================================================================
!
CONTAINS
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:,:)
DOUBLE PRECISION :: sum2
INTEGER :: i, j
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
DO j=1,SIZE(x,2)
sum2 = sum2 + x(i,j)**2
END DO
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
SUBROUTINE prnmat(label, mat)
CHARACTER(len=*) :: label
DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
INTEGER :: i
WRITE(*,'(/a)') TRIM(label)
DO i=1,SIZE(mat,1)
WRITE(*,'(10(1pe12.3))') mat(i,:)
END DO
END SUBROUTINE prnmat
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
diff --git a/examples/pde2d_sym_pardiso_dft.f90 b/examples/pde2d_sym_pardiso_dft.f90
index 42426e6..3b77cf5 100644
--- a/examples/pde2d_sym_pardiso_dft.f90
+++ b/examples/pde2d_sym_pardiso_dft.f90
@@ -1,1034 +1,1034 @@
!>
!> @file pde2d_sym_pardiso_dft.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
!
! Solving the 2d PDE using splines and PARDISO symmetric matrix.
! The periodic coordinate y is discrete Fourier transformed.
!
! -d/dx[x C d/dx]f - 1x/d/dy[Cd/dy] f = \rho, with f(x=1,y) = 0
! C(x,y) = 1 + \epsilon x cos(y)
! exact solution: f(x,y) = (1-x^2) x^m cos(my)
!
MODULE pde2d_sym_pardiso_dft_mod
USE bsplines
USE pardiso_bsplines
IMPLICIT NONE
!
CONTAINS
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE dismat(spl, epsi, mat)
!
! Assembly of FE matrix mat using spline spl
!
TYPE(spline2d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(in) :: epsi
TYPE(zpardiso_mat), INTENT(inout) :: mat
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: kmin, kmax, dk
INTEGER :: i, j, ig1, ig2, kc
INTEGER :: iterm, iw1, mw, igw1, it1, mt, igt1, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:)
DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:,:), fft_temp(:)
DOUBLE COMPLEX :: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER :: kcoupl ! Number of mode couplings
INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order
DOUBLE COMPLEX, ALLOCATABLE :: coefs(:,:,:,:) ! Terms in weak form
INTEGER, ALLOCATABLE :: left1(:), left2(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
kmin = spl%sp2%dft%kmin
kmax = spl%sp2%dft%kmax
dk = spl%sp2%dft%dk
WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
WRITE(*,'(a, 5i6)') 'kmin, kmax, dk =', kmin, kmax, dk
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng2), wg2(ng2))
!
! Weak form
!
kterms = mat%nterms
kcoupl = SIZE(spl%sp2%dft%mode_couplings)
ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
ALLOCATE(coefs(kterms,kcoupl,ng1,ng2))
!
! Splines and derivatives at all Gauss points
!
ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
ALLOCATE(ft_fun2(kmin:kmax,0:1,ng2)) ! DFT of splines and 1st derivative
ALLOCATE(fft_temp(0:n2-1)) ! Used in coefeq
!===========================================================================
! 2.0 Assembly loop
!
ALLOCATE(left1(ng1))
ALLOCATE(left2(ng2))
!
! First interval in 2nd (periodic) coordinate
!
j = 1
CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2)
left2 = j
CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2)
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
left1 = i
CALL basfun(xg1, spl%sp1, fun1, left1)
DO ig1=1,ng1
DO ig2=1,ng2
CALL coefeq(xg1(ig1), xg2(ig2), &
& idert(:,:,ig1,ig2), &
& iderw(:,:,ig1,ig2), &
& coefs(:,:,ig1,ig2))
END DO
END DO
!
DO iw1=0,nidbas1 ! Weight function in dir 1
igw1 = i+iw1
DO it1=0,nidbas1 ! Test function in dir 1
igt1 = i+it1
DO mt=kmin,kmax ! Test Fourier mode
DO kc=1,kcoupl
mw = mt + spl%sp2%dft%mode_couplings(kc)
IF(mw.LT.kmin .OR. mw.GT.kmax) CYCLE
!-------------
contrib = (0.0d0, 0.0d0)
DO ig1=1,ng1
DO ig2=1,ng2
DO iterm=1,kterms
contrib = contrib + &
& fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
& ft_fun2(mw,iderw(iterm,2,ig1,ig2),ig2) * &
& coefs(iterm,kc,ig1,ig2) * &
& CONJG(ft_fun2(mt,idert(iterm,2,ig1,ig2),ig2)) * &
& fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
& wg1(ig1) * wg2(ig2) /REAL(n2,8)
END DO
END DO
END DO
irow = (igw1-1)*dk + (mw-kmin)+1 ! Number first mode m then radial coord.
jcol = (igt1-1)*dk + (mt-kmin)+1
CALL updtmat(mat, irow, jcol, contrib)
!-------------
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, ft_fun2)
DEALLOCATE(idert, iderw, coefs)
DEALLOCATE(left1,left2)
DEALLOCATE(fft_temp)
!
CONTAINS
SUBROUTINE coefeq(x, y, idt, idw, c)
USE fft
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE COMPLEX, INTENT(out) :: c(:,:)
!
DOUBLE PRECISION :: zcoef, dy
INTEGER :: j, k, kc, kp
!
! Weak form = Int(x*C*dw/dx*dt/dx + C/x*dw/dy*dt/dy)dxdy
! C(x,y) = 1 + epsilon*x*cos(y)
!
dy = spl%sp2%dft%dx
kc = SIZE(spl%sp2%dft%mode_couplings)
DO j=0,n2-1
fft_temp(j) = 1.0d0+epsi*x*COS(y+j*dy)
END DO
CALL fourcol(fft_temp,1)
DO k=1,kc
kp = spl%sp2%dft%mode_couplings(k)
IF(kp.LT.0) kp=kp+n2
c(1,k) = x*fft_temp(kp)
c(2,k) = fft_temp(kp)/x
END DO
!!$ WRITE(*,'(a/(10(1pe12.4)))') 'fft_temp', ABS(fft_temp)
!!$ WRITE(*,'(a/(10(1pe12.4)))') 'c1', ABS(c(1,:))
!!$ WRITE(*,'(a/(10(1pe12.4)))') 'c2', ABS(c(2,:))
!
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE disrhs(mbess, epsi, spl, rhs)
!
! Assembly the RHS using 2d spline spl
!
INTEGER, INTENT(in) :: mbess
DOUBLE PRECISION, INTENT(in) :: epsi
TYPE(spline2d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
!
ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!===========================================================================
! 2.0 Assembly loop
!
nrank = SIZE(rhs)
rhs(1:nrank) = 0.0d0
!
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), spl%sp1, fun1, i)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), spl%sp2, fun2, j)
contrib = wg1(ig1)*wg2(ig2) * &
& rhseq(xg1(ig1),xg2(ig2), mbess)
DO k1=0,nidbas1
i1 = i+k1
DO k2=0,nidbas2
j2 = MODULO(j+k2-1,n2) + 1
ij = j2 + (i1-1)*n2
rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x, y, m)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(in) :: m
!
DOUBLE PRECISION :: xm
xm = REAL(m,8)
rhseq = x**(m+1) * ( 4.0d0*(xm+1.0d0)*COS(xm*y) + &
& epsi*x*( &
& ( (3.0d0*(xm+1.0d0) - xm/x**2)*COS((xm-1.0d0)*y) + &
& (3.0d0+2.0d0*xm)*COS((xm+1.0d0)*y) ) &
& ))
END FUNCTION rhseq
END SUBROUTINE disrhs
!
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcmat(mat, dft)
!
! Apply BC on matrix
!
TYPE(zpardiso_mat), INTENT(inout) :: mat
TYPE(dftmap), INTENT(in) :: dft
INTEGER :: nrank, k, kmin, kmax, dk, i
DOUBLE COMPLEX :: arr(mat%rank)
!===========================================================================
! 1.0 Prologue
!
nrank = mat%rank
kmin = dft%kmin
kmax = dft%kmax
dk = dft%dk
!===========================================================================
! 2.0 BC at the axis
!
! zero for non-zero modes
!
DO k=kmin,kmax
IF(k.NE.0) THEN
i = k-kmin+1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END IF
END DO
!===========================================================================
! 3.0 Dirichlet on right boundary
!
DO i = nrank, nrank-dk+1, -1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!
END SUBROUTINE ibcmat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcrhs(rhs, dft)
!
! Apply BC on RHS
!
DOUBLE COMPLEX, INTENT(inout) :: rhs(:)
TYPE(dftmap), INTENT(in) :: dft
INTEGER :: nrank, kmin, kmax, dk, k
!===========================================================================
! 1.0 Prologue
!
nrank = SIZE(rhs,1)
kmin = dft%kmin
kmax = dft%kmax
dk = dft%dk
!===========================================================================
! 2.0 BC at the axis
!
! zero for non-zero modes
!
DO k=kmin,kmax
IF(k.NE.0) rhs(k-kmin+1) = 0.0
END DO
!===========================================================================
! 3.0 Dirichlet on right boundary
!
rhs(nrank-dk+1:nrank) = 0.0d0
END SUBROUTINE ibcrhs
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE spectrum0(spl, carr, xpt, cspec)
!
! DFT modes at xpt (integration on the first interval)
!
DOUBLE COMPLEX, PARAMETER :: ci = (0.0d0,1.0d0)
DOUBLE PRECISION, PARAMETER :: pi = 3.141592653589793d0
TYPE(spline2d), INTENT(in) :: spl
DOUBLE COMPLEX, INTENT(in) :: carr(:)
DOUBLE PRECISION, INTENT(in) :: xpt
DOUBLE COMPLEX, INTENT(out) :: cspec(:)
!
INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
INTEGER :: k, kmin, kmax, dk, kk
INTEGER :: ng2, ig2
INTEGER, ALLOCATABLE :: left2(:)
DOUBLE PRECISION :: temp(1)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:)
DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:,:), psi(:), coefs(:,:)
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
kmin = spl%sp2%dft%kmin
kmax = spl%sp2%dft%kmax
dk = spl%sp2%dft%dk
!
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(left2(ng2))
ALLOCATE(xg2(ng2), wg2(ng2))
ALLOCATE(ft_fun2(kmin:kmax,1,ng2)) ! DFT of splines
ALLOCATE(psi(kmin:kmax))
!
! Integration over first interval
!
left2 = 1
CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2)
CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2)
psi = (0.0d0,0.0d0)
DO k=kmin,kmax
DO ig2=1,ng2
psi(k) = psi(k) + wg2(ig2)*EXP(k*ci*xg2(ig2))*CONJG(ft_fun2(k,1,ig2))
END DO
END DO
!
ALLOCATE(coefs(dk,ndim1))
coefs = RESHAPE(carr, SHAPE(coefs))
temp = xpt
DO kk=kmin,kmax
k=kk-kmin+1
coefs(k,:) = psi(kk)*coefs(k,:)
CALL gridval(spl%sp1, temp, cspec(k:k), 0, coefs(k,:))
END DO
cspec = cspec/(2.0d0*pi)
!
DEALLOCATE(left2)
DEALLOCATE(xg2, wg2)
DEALLOCATE(ft_fun2)
DEALLOCATE(psi)
DEALLOCATE(coefs)
END SUBROUTINE spectrum0
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE spectrum1(spl, carr, xpt, ypt0, cspec)
!
! DFT modes at xpt (at the initial ypt0)
!
DOUBLE COMPLEX, PARAMETER :: ci = (0.0d0,1.0d0)
!
TYPE(spline2d), INTENT(in) :: spl
DOUBLE COMPLEX, INTENT(in) :: carr(:)
DOUBLE PRECISION, INTENT(in) :: xpt, ypt0
DOUBLE COMPLEX, INTENT(out) :: cspec(:)
!
INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
INTEGER :: k, kmin, kmax, dk
DOUBLE PRECISION :: temp(1)
DOUBLE COMPLEX :: ctemp(1)
DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:), coefs(:,:)
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
kmin = spl%sp2%dft%kmin
kmax = spl%sp2%dft%kmax
dk = spl%sp2%dft%dk
!
! DFT of splines at ypt0
ALLOCATE(ft_fun2(kmin:kmax,1))
ALLOCATE(coefs(kmin:kmax,ndim1))
CALL ft_basfun(ypt0, spl%sp2, ft_fun2, 1)
coefs = RESHAPE(carr, SHAPE(coefs))
!
temp = xpt
DO k=kmin,kmax
CALL gridval(spl%sp1, temp, ctemp, 0, coefs(k,:))
cspec(k-kmin+1) = CONJG(ft_fun2(k,1))*ctemp(1)*EXP(k*ci*ypt0)
END DO
cspec = cspec/REAL(n2,8)
!
DEALLOCATE(ft_fun2)
DEALLOCATE(coefs)
END SUBROUTINE spectrum1
!!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE spectrum2(spl, xpt, ypt0, cspec)
!
! DFT modes at xpt (at the initial ypt0)
!
USE fft
USE bsplines
!
DOUBLE COMPLEX, PARAMETER :: ci = (0.0d0,1.0d0)
!
TYPE(spline2d) :: spl
DOUBLE PRECISION, INTENT(in) :: xpt, ypt0
DOUBLE COMPLEX, INTENT(out) :: cspec(:)
!
INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
INTEGER :: k, kmin, kmax, dk
DOUBLE PRECISION, ALLOCATABLE :: ypt(:), fun(:,:)
DOUBLE COMPLEX, ALLOCATABLE :: ft_fun(:)
DOUBLE PRECISION :: temp(1)
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
kmin = spl%sp2%dft%kmin
kmax = spl%sp2%dft%kmax
dk = spl%sp2%dft%dk
!
ALLOCATE(ypt(0:n2-1))
ALLOCATE(fun(1, 0:n2-1))
ALLOCATE(ft_fun(0:n2-1))
!
! Function values at points ypt
!
ypt(0:n2-1) = ypt0 + spl%sp2%knots(0:n2-1)
temp = xpt
CALL gridval(spl, temp, ypt, fun, (/0,0/))
ft_fun = fun(1,:)
!
! Discrete Fourier Transform
!
CALL fourcol(ft_fun, 1)
DO k=kmin,kmax
IF(k.LT.0) THEN
cspec(k-kmin+1) = ft_fun(k+n2)*EXP(k*ci*ypt0)
ELSE
cspec(k-kmin+1) = ft_fun(k)*EXP(k*ci*ypt0)
END IF
END DO
cspec = cspec/REAL(n2,8)
!
DEALLOCATE(ypt)
DEALLOCATE(fun)
DEALLOCATE(ft_fun)
END SUBROUTINE spectrum2
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE pde2d_sym_pardiso_dft_mod
PROGRAM main
USE pde2d_sym_pardiso_dft_mod
USE futils
USE fft
!
IMPLICIT NONE
INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms, kmin, kmax, dk
INTEGER :: n_mode_couplings
INTEGER, ALLOCATABLE :: mode_couplings(:)
LOGICAL :: nlppform
INTEGER :: i, j, ij, dimx, dimy, nrank, nrank_full, jder(2), it, i0, i0_r
INTEGER :: k, kp, ik
DOUBLE PRECISION :: pi, epsi, coefx(5), coefy(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
DOUBLE COMPLEX, ALLOCATABLE :: crhs(:), crhs_r(:), csol(:), csol_r(:)
TYPE(spline2d) :: splxy
TYPE(zpardiso_mat) :: mat
!
CHARACTER(len=128) :: file='pde2d_sym_pardiso_dft.h5'
INTEGER :: fid
DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: arr, srow
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
DOUBLE PRECISION :: seconds, mem, dopla
DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tfour, tfour0, tgrid, gflops1
INTEGER :: nits=100
LOGICAL :: nlmetis, nlforce_zero, nlpos
!
DOUBLE PRECISION :: xpt, ypt0
DOUBLE COMPLEX, ALLOCATABLE :: cspec0(:), cspec(:), energy_k(:)
DOUBLE COMPLEX :: energy, energy_exact
!
NAMELIST /newrun/ nx, ny, nidbas, ngauss, kmin, kmax, mbess, epsi, &
& nlppform, nlmetis, nlforce_zero, nlpos, coefx, coefy
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number of intervals in x
ny = 8 ! Number of intervals in y
nidbas = (/3,3/) ! Degree of splines
ngauss = (/4,4/) ! Number of Gauss points/interval
kmin = -3 ! Minimum Fourier mode number
kmax = 3 ! Maximum Fourier mode number
mbess = 2 ! Exponent of differential problem
epsi = 0.5 ! Non-uniformity in the Laplacian coefficicient
nterms = 2 ! Number of terms in weak form
nlppform = .TRUE. ! Use PPFORM for gridval or not
nlmetis = .FALSE. ! Use metis ordering or minimum degree
nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele
nlpos = .TRUE. ! Matrix is positive definite
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
WRITE(*,newrun)
!!
! Read table of mode couplings
!
READ(*,*) n_mode_couplings
ALLOCATE(mode_couplings(n_mode_couplings))
READ(*,*) mode_couplings
WRITE(*,'(/a/(20i4))') 'Mode couplings', mode_couplings
!
! Define grid on x (=radial) & y (=poloidal) axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny))
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi
CALL meshdist(coefy, ygrid, ny)
!
! Exact energy
!
energy_exact = 2.0d0*pi/REAL(2+mbess,8)
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
CALL attach(fid, '/', 'MBESS', mbess)
CALL attach(fid, '/', 'EPSI', epsi)
CALL attach(fid, '/', 'KMIN', kmin)
CALL attach(fid, '/', 'KMAX', kmax)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, &
& (/.FALSE., .TRUE./))
!
! Init DFT for spline in 2nd direction
!
CALL init_dft(splxy%sp2, kmin, kmax, mode_couplings)
dk = splxy%sp2%dft%dk
!
! FE matrix assembly
!
dimx = splxy%sp1%dim
dimy = splxy%sp2%dim
nrank = (nx+nidbas(1))*dk ! Rank of restricted matrix
nrank_full = (nx+nidbas(1))*ny ! Rank of full matrix
!
ALLOCATE(rhs(nrank_full), sol(nrank_full))
ALLOCATE(crhs(nrank_full), csol(nrank_full))
ALLOCATE(crhs_r(nrank), csol_r(nrank))
!
WRITE(*,'(a,i8)') 'nrank_full', nrank_full
WRITE(*,'(a,i8)') 'nrank ', nrank
!
t0 = seconds()
CALL init(nrank, nterms, mat, nlherm=.TRUE.)
CALL dismat(splxy, epsi, mat)
ALLOCATE(arr(nrank))
ALLOCATE(srow(nrank))
DO i=1,nrank
CALL getrow(mat, i, arr)
srow(i) = SUM(arr)
END DO
!!$ WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows before BC', REAL(srow)
!!$ WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows before BC', AIMAG(srow)
PRINT*, 'Sum of mat before BC', SUM(srow)
!
! BC on Matrix
!
WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
CALL ibcmat(mat, splxy%sp2%dft)
tmat = seconds() - t0
DO i=1,nrank
CALL getrow(mat, i, arr)
srow(i) = SUM(arr)
END DO
!!$ WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows after BC', REAL(srow)
!!$ WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows after BC', AIMAG(srow)
PRINT*, 'Sum of mat after BC', SUM(srow)
!
WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
!
! RHS assembly
!
CALL disrhs(mbess, epsi, splxy, rhs)
!
! Init FFT
!
t0 = seconds()
CALL fourcol(crhs(1:ny),1)
CALL fourcol(crhs(1:ny),-1)
tfour0 = seconds()-t0
crhs = crhs/REAL(ny,8)
!
! DFT of RHS
!
t0 = seconds()
crhs = rhs
DO i=1,nx+nidbas(1)
i0 = (i-1)*ny
CALL fourcol(crhs(i0+1:i0+ny), 1)
END DO
tfour = seconds()-t0
!
! Restriction in Fourier space
! k = kmin:kmax (restricted)
! kp = 0:ny-1 (full)
!
DO i=1,nx+nidbas(1)
i0 = (i-1)*ny
i0_r = (i-1)*dk
DO k=kmin,kmax
kp = k
IF(kp.LT.0) kp = kp+ny
crhs_r(i0_r+k-kmin+1) = crhs(i0+kp+1)
END DO
END DO
!
! BC on RHS
!
CALL ibcrhs(crhs_r, splxy%sp2%dft)
!
IF(nrank.LT.100) THEN
WRITE(*,'(a/(10(1pe12.3)))') 'Real of crhs', REAL(crhs)
WRITE(*,'(a/(10(1pe12.3)))') 'Imag of crhs', AIMAG(crhs)
END IF
!===========================================================================
! 3.0 Solve the dicretized system
!
! Matrix factorization
!
t0 = seconds()
!!$ CALL factor(mat, nlmetis=nlmetis)
CALL to_mat(mat)
CALL reord_mat(mat, nlmetis=nlmetis); CALL putmat(fid, '/MAT1', mat)
CALL numfact(mat)
tfact = seconds() - t0
DO i=1,nrank
CALL getrow(mat, i, arr)
srow(i) = SUM(arr)
END DO
PRINT*, 'Sum of mat after factor', SUM(srow)
WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A = ',mat%p%iparm(18)
WRITE(*,'(a,i12)') 'Number of factorization MFLOPS = ',mat%p%iparm(19)
gflops1 = mat%p%iparm(19) / tfact / 1.d3
!
! Backsolve
!
t0 = seconds()
PRINT*, 'SUM of crhs_r', SUM(crhs_r)
CALL bsolve(mat, crhs_r, csol_r, debug=.FALSE.)
WRITE(*,'(a,1pe12.4)') 'Residue =', cnorm2(vmx(mat,csol_r)-crhs_r)
tsolv = seconds() - t0
PRINT*, 'SUM of csol_r', SUM(csol_r)
!
CALL putarr(fid, '/FT_RHS', crhs_r, 'DFT of RHS')
CALL putarr(fid, '/FT_SOL', csol_r, 'DFT of Spline coefficients')
!===========================================================================
! 4.0 Perform some diagnostics in Fourier space
!
! Fourier spectrum at xpt
!
xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8))
ALLOCATE(cspec0(dk))
CALL spectrum0(splxy, csol_r, xpt, cspec0)
WRITE(*,'(/a,f10.5)') 'DFT spectrum (by integration) at x = ', xpt
DO k=kmin,kmax
WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
END DO
!
ypt0 = 0.0d0
WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0
CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0)
DO k=kmin,kmax
WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
END DO
ypt0 = splxy%sp2%dft%dx/2.0d0 ! Center of first interval
WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0
CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0)
DO k=kmin,kmax
WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
END DO
!
! Spectral energy
!
WRITE(*,'(/a)') 'Spectral energies'
ALLOCATE(energy_k(kmin:kmax))
energy_k = (0.0d0,0.0d0)
DO i=1,dimx
i0_r = (i-1)*dk
DO k=kmin,kmax
ik = i0_r+k-kmin+1
energy_k(k) = energy_k(k) + csol_r(ik)*CONJG(crhs_r(ik))
END DO
END DO
energy_k = energy_k/REAL(ny,8)
energy = SUM(energy_k)
DO k=kmin,kmax
WRITE(*,'(i5,3(1pe15.3))') k, energy_k(k), ABS(energy_k(k))
END DO
WRITE(*,'(a5,4(1pe15.3))') 'Sum', energy, ABS(energy), REAL(energy-energy_exact)
!
CALL putarr(fid, '/ENERGY_K', energy_k, 'Spectral energies')
!===========================================================================
! 5.0 Transform back to real space
!
! Expand to full Fourier space
! k = kmin:kmax (restricted)
! kp = 0:ny-1 (full)
!
crhs = (0.0d0,0.0d0)
DO i=1,nx+nidbas(1)
i0 = (i-1)*ny
i0_r = (i-1)*dk
DO k=kmin,kmax
kp = k
IF(kp.LT.0) kp = kp+ny
csol(i0+kp+1) = csol_r(i0_r+k-kmin+1)
END DO
END DO
!
! Fourier transform back to real space
!
t0 = seconds()
DO i=1,nx+nidbas(1)
i0 = (i-1)*ny
CALL fourcol(csol(i0+1:i0+ny),-1)
END DO
sol = REAL(csol)/REAL(ny,8)
tfour = tfour + seconds()-t0
!
!
! Spline coefficients, taking into account of periodicity in y
! Note: in SOL, y was numbered first.
!
ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
DO j=0,dimy-1
DO i=0,dimx-1
ij = MODULO(j,ny) + i*ny + 1
bcoef(i,j) = sol(ij)
END DO
END DO
CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
!
! Total energy
!
WRITE(*,'(/a, 2(1pe15.3))') 'Total energy and error(real space)', &
& DOT_PRODUCT(rhs,sol), &
& DOT_PRODUCT(rhs,sol)-REAL(energy_exact)
WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
!===========================================================================
! 6.0 Check the solution
!
! Check function values computed with various method
!
ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
DO i=0,nx
DO j=0,ny
solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
END DO
END DO
jder = (/0,0/)
!
! Compute PPFORM/BCOEFS at first call to gridval
!
CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
!
! Fourier spectrum at xpt
!
xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8))
ALLOCATE(cspec(dk))
!
ypt0 = 0.0d0
WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0
CALL spectrum2(splxy, xpt, ypt0, cspec)
DO k=kmin,kmax
WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1))
END DO
!
ypt0 = splxy%sp2%dft%dx/2.0d0 ! Center of first interval
WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0
CALL spectrum2(splxy, xpt, ypt0, cspec)
DO k=kmin,kmax
WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1))
END DO
!
WRITE(*,'(/a)') '*** Checking solutions'
t0 = seconds()
DO it=1,nits ! nits iterations for timing
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
END DO
tgrid = (seconds() - t0)/REAL(nits)
errsol = solana - solcal
IF( SIZE(bcoef,2) .LE. 10 ) THEN
CALL prnmat('BCOEF', bcoef)
CALL prnmat('SOLANA', solana)
CALL prnmat('SOLCAL', solcal)
CALL prnmat('ERRSOL', errsol)
END IF
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
& norm2(errsol) / norm2(solana)
WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid
!
CALL putarr(fid, '/xgrid', xgrid, 'r')
CALL putarr(fid, '/ygrid', ygrid, '\theta')
CALL putarr(fid, '/sol', solcal, 'Solutions')
CALL putarr(fid, '/solana', solana,'Exact solutions')
CALL putarr(fid, '/errors', errsol, 'Errors')
!
! Check derivatives d/dx and d/dy
!
WRITE(*,'(/a)') '*** Checking gradient'
DO i=0,nx
DO j=0,ny
IF( mbess .EQ. 0 ) THEN
solana(i,j) = -2.0d0 * xgrid(i)
ELSE
solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
& xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
END IF
END DO
END DO
!
jder = (/1,0/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
errsol = solana - solcal
CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol)
!
DO i=0,nx
DO j=0,ny
solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
END DO
END DO
!
jder = (/0,1/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
errsol = solana - solcal
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol)
!
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv
WRITE(*,'(a,1pe12.3)') 'Init FFT time (s) ', tfour0
WRITE(*,'(a,1pe12.3)') 'FFT time (s) ', tfour
WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(cspec0, cspec)
DEALLOCATE(mode_couplings)
DEALLOCATE(xgrid, ygrid, rhs, sol)
DEALLOCATE(crhs, csol)
DEALLOCATE(crhs_r, csol_r)
DEALLOCATE(solcal, solana, errsol)
DEALLOCATE(bcoef)
DEALLOCATE(arr)
DEALLOCATE(srow)
DEALLOCATE(energy_k)
CALL destroy_sp(splxy)
CALL destroy(mat)
!
CALL closef(fid)
!===========================================================================
!
CONTAINS
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:,:)
DOUBLE PRECISION :: sum2
INTEGER :: i, j
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
DO j=1,SIZE(x,2)
sum2 = sum2 + x(i,j)**2
END DO
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
!
FUNCTION cnorm2(x)
DOUBLE COMPLEX, INTENT(in) :: x(:)
DOUBLE PRECISION :: cnorm2
cnorm2 = SQRT(DOT_PRODUCT(x,x))
END FUNCTION cnorm2
!
SUBROUTINE prnmat(label, mat)
CHARACTER(len=*) :: label
DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
INTEGER :: i
WRITE(*,'(/a)') TRIM(label)
DO i=1,SIZE(mat,1)
WRITE(*,'(10(1pe12.3))') mat(i,:)
END DO
END SUBROUTINE prnmat
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
diff --git a/examples/pde2d_sym_wsmp.f90 b/examples/pde2d_sym_wsmp.f90
index 2d0c56e..8d44461 100644
--- a/examples/pde2d_sym_wsmp.f90
+++ b/examples/pde2d_sym_wsmp.f90
@@ -1,696 +1,696 @@
!>
!> @file pde2d_sym_wsmp.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
!
! Solving the 2d PDE using splines and WSMP symmetric matrix
!
! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
! exact solution: f(x,y) = (1-x^2) x^m cos(my)
!
MODULE pde2d_sym_wsmp_mod
USE bsplines
USE wsmp_bsplines
IMPLICIT NONE
CONTAINS
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE dismat(spl, mat)
!
! Assembly of FE matrix mat using spline spl
!
TYPE(spline2d), INTENT(in) :: spl
TYPE(wsmp_mat), INTENT(inout) :: mat
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2
INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
DOUBLE PRECISION:: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order
DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form
INTEGER, ALLOCATABLE :: left1(:), left2(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
ALLOCATE(coefs(kterms,ng1,ng2))
!
ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
ALLOCATE(fun2(0:nidbas2,0:1,ng2)) !
!===========================================================================
! 2.0 Assembly loop
!
ALLOCATE(left1(ng1))
ALLOCATE(left2(ng2))
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
left1 = i
CALL basfun(xg1, spl%sp1, fun1, left1)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
left2 = j
CALL basfun(xg2, spl%sp2, fun2, left2)
!
DO ig1=1,ng1
DO ig2=1,ng2
CALL coefeq(xg1(ig1), xg2(ig2), &
& idert(:,:,ig1,ig2), &
& iderw(:,:,ig1,ig2), &
& coefs(:,ig1,ig2))
END DO
END DO
!
DO iw1=0,nidbas1 ! Weight function in dir 1
igw1 = i+iw1
DO iw2=0,nidbas2 ! Weight function in dir 2
igw2 = MODULO(j+iw2-1, n2) + 1
irow = igw2 + (igw1-1)*n2
DO it1=0,nidbas1 ! Test function in dir 1
igt1 = i+it1
DO it2=0,nidbas2 ! Test function in dir 2
igt2 = MODULO(j+it2-1, n2) + 1
jcol = igt2 + (igt1-1)*n2
!-------------
contrib = 0.0d0
DO ig1=1,ng1
DO ig2=1,ng2
DO iterm=1,kterms
contrib = contrib + &
& fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
& fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
& coefs(iterm,ig1,ig2) * &
& fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
& fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
& wg1(ig1) * wg2(ig2)
END DO
END DO
END DO
CALL updtmat(mat, irow, jcol, contrib)
!-------------
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
DEALLOCATE(idert, iderw, coefs)
DEALLOCATE(left1,left2)
!
CONTAINS
SUBROUTINE coefeq(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
!
! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
!
c(1) = x !
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.d0/x
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE disrhs(mbess, spl, rhs)
!
! Assembly the RHS using 2d spline spl
!
INTEGER, INTENT(in) :: mbess
TYPE(spline2d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
!
ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!===========================================================================
! 2.0 Assembly loop
!
nrank = SIZE(rhs)
rhs(1:nrank) = 0.0d0
!
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), spl%sp1, fun1, i)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), spl%sp2, fun2, j)
contrib = wg1(ig1)*wg2(ig2) * &
& rhseq(xg1(ig1),xg2(ig2), mbess)
DO k1=0,nidbas1
i1 = i+k1
DO k2=0,nidbas2
j2 = MODULO(j+k2-1,n2) + 1
ij = j2 + (i1-1)*n2
rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
DOUBLE PRECISION, INTENT(in) :: x1, x2
INTEGER, INTENT(in) :: m
rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
END FUNCTION rhseq
END SUBROUTINE disrhs
!
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcmat(mat, ny)
!
! Apply BC on matrix
!
TYPE(wsmp_mat), INTENT(inout) :: mat
INTEGER, INTENT(in) :: ny
INTEGER :: nrank, i, j
DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
!===========================================================================
! 1.0 Prologue
!
nrank = mat%rank
ALLOCATE(zsum(nrank), arr(nrank))
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum on the NY-th row
!
zsum = 0.0d0
DO i=1,ny
arr = 0.0d0
CALL getrow(mat, i, arr)
zsum(:) = zsum(:) + arr(:)
END DO
zsum(ny) = SUM(zsum(1:ny)) ! using symmetry
CALL putrow(mat, ny, zsum)
!
! The away operator
!
DO i = 1,ny-1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!
!===========================================================================
! 3.0 Dirichlet on right boundary
!
DO i = nrank, nrank-ny+1, -1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(zsum, arr)
!
END SUBROUTINE ibcmat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcrhs(rhs, ny)
!
! Apply BC on RHS
!
DOUBLE PRECISION, INTENT(inout) :: rhs(:)
INTEGER, INTENT(in) :: ny
INTEGER :: nrank
DOUBLE PRECISION :: zsum
!===========================================================================
! 1.0 Prologue
!
nrank = SIZE(rhs,1)
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum
!
zsum = SUM(rhs(1:ny))
rhs(ny) = zsum
rhs(1:ny-1) = 0.0d0
!===========================================================================
! 3.0 Dirichlet on right boundary
!
rhs(nrank-ny+1:nrank) = 0.0d0
END SUBROUTINE ibcrhs
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE pde2d_sym_wsmp_mod
PROGRAM main
USE pde2d_sym_wsmp_mod
USE futils
!
IMPLICIT NONE
INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
LOGICAL :: nlppform
INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
DOUBLE PRECISION :: pi, coefx(5), coefy(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
TYPE(spline2d) :: splxy
TYPE(wsmp_mat) :: mat
!
CHARACTER(len=128) :: file='pde2d_sym_wsmp.h5'
INTEGER :: fid
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
DOUBLE PRECISION :: seconds, mem, dopla
DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
DOUBLE PRECISION :: tconv, treord
INTEGER :: nits=100
LOGICAL :: nlforce_zero, nlpos
!
NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlpos, &
& nlforce_zero, coefx, coefy
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number of intervals in x
ny = 8 ! Number of intervals in y
nidbas = (/3,3/) ! Degree of splines
ngauss = (/4,4/) ! Number of Gauss points/interval
mbess = 2 ! Exponent of differential problem
nterms = 2 ! Number of terms in weak form
nlppform = .TRUE. ! Use PPFORM for gridval or not
nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele
nlpos = .TRUE. ! Matrix is positive definite
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x (=radial) & y (=poloidal) axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny))
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi
CALL meshdist(coefy, ygrid, ny)
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
CALL attach(fid, '/', 'MBESS', mbess)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
t0 = seconds()
CALL set_spline(nidbas, ngauss, &
& xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
!
! FE matrix assembly
!
nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix
WRITE(*,'(a,i8)') 'nrank', nrank
!
CALL init(nrank, nterms, mat, nlsym=.TRUE., nlpos=nlpos)
CALL dismat(splxy, mat)
ALLOCATE(arr(nrank))
IF(nrank.LT.100) THEN
DO i=1,nrank
CALL getele(mat, i, i, arr(i))
END DO
WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr
END IF
!
! BC on Matrix
!
WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
CALL ibcmat(mat, ny)
IF(nrank.LT.100) THEN
DO i=1,nrank
CALL getele(mat, i, i, arr(i))
END DO
WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr
WRITE(*,'(a)') 'Last rows'
DO i=nrank-ny,nrank
CALL getrow(mat, i, arr)
WRITE(*,'(10(1pe12.3))') arr
END DO
END IF
tmat = seconds() - t0
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(mbess, splxy, rhs)
!
! BC on RHS
!
CALL ibcrhs(rhs, ny)
CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
!===========================================================================
! 3.0 Solve the dicretized system
!
t0 = seconds()
CALL factor(mat)
!
! The call to "factor" could be split into the
! 3 following calls
!
!!$ CALL to_mat(mat)
!!$ tconv = seconds() -t0
!!$ WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat)
!!$ WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem()
!!$!
!!$ t0 = seconds()
!!$ CALL reord_mat(mat)
!!$ CALL putmat(fid, '/MAT', mat)
!!$ treord = seconds() - t0
!!$!
!!$ t0 = seconds()
!!$ CALL numfact(mat)
tfact = seconds() - t0
WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24)
WRITE(*,'(a,1pe12.3)') 'Number of factorization GFLOPS = ',mat%p%dparm(23)/1.d9
gflops1 = mat%p%dparm(23) / tfact / 1.d9
!
CALL bsolve(mat, rhs, sol)
t0 = seconds()
DO it=1,nits ! nits iterations for timing
CALL bsolve(mat, rhs, sol)
sol(1:ny-1) = sol(ny)
END DO
WRITE(*,'(/a,i6)') 'Number of refinement steps = ',mat%p%iparm(6)
WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
tsolv = (seconds() - t0)/REAL(nits)
!
! Spline coefficients, taking into account of periodicity in y
! Note: in SOL, y was numbered first.
!
dimx = splxy%sp1%dim
dimy = splxy%sp2%dim
ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
DO j=0,dimy-1
DO i=0,dimx-1
ij = MODULO(j,ny) + i*ny + 1
bcoef(i,j) = sol(ij)
END DO
END DO
WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
!===========================================================================
! 4.0 Check the solution
!
! Check function values computed with various method
!
ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
DO i=0,nx
DO j=0,ny
solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
END DO
END DO
jder = (/0,0/)
!
! Compute PPFORM/BCOEFS at first call to gridval
CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
!
WRITE(*,'(/a)') '*** Checking solutions'
t0 = seconds()
DO it=1,nits ! nits iterations for timing
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
END DO
tgrid = (seconds() - t0)/REAL(nits)
errsol = solana - solcal
IF( SIZE(bcoef,2) .LE. 10 ) THEN
CALL prnmat('BCOEF', bcoef)
CALL prnmat('SOLANA', solana)
CALL prnmat('SOLCAL', solcal)
CALL prnmat('ERRSOL', errsol)
END IF
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
& norm2(errsol) / norm2(solana)
WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid
!
CALL putarr(fid, '/xgrid', xgrid, 'r')
CALL putarr(fid, '/ygrid', ygrid, '\theta')
CALL putarr(fid, '/sol', solcal, 'Solutions')
CALL putarr(fid, '/solana', solana,'Exact solutions')
CALL putarr(fid, '/errors', errsol, 'Errors')
!
! Check derivatives d/dx and d/dy
!
WRITE(*,'(/a)') '*** Checking gradient'
DO i=0,nx
DO j=0,ny
IF( mbess .EQ. 0 ) THEN
solana(i,j) = -2.0d0 * xgrid(i)
ELSE
solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
& xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
END IF
END DO
END DO
!
jder = (/1,0/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
errsol = solana - solcal
CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol)
!
DO i=0,nx
DO j=0,ny
solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
END DO
END DO
!
jder = (/0,1/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
errsol = solana - solcal
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol)
!
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv
WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv
WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1
!===========================================================================
! 5.0 Clear the matrix and recompute
!
WRITE(*,'(/a)') 'Recompute the solver ...'
t0 = seconds()
CALL clear_mat(mat)
CALL dismat(splxy, mat)
CALL ibcmat(mat, ny)
tmat = seconds()-t0
!
t0 = seconds()
CALL numfact(mat)
tfact = seconds()-t0
gflops1 = mat%p%dparm(23) / tfact / 1.d9
!
t0 = seconds()
ALLOCATE(newsol(nrank))
CALL bsolve(mat, rhs, newsol)
newsol(1:ny-1) = newsol(ny)
tsolv = seconds()-t0
!
WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s) ', tsolv
WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv
WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1
!
DEALLOCATE(newsol)
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xgrid, rhs, sol)
DEALLOCATE(solcal, solana, errsol)
DEALLOCATE(bcoef)
DEALLOCATE(arr)
CALL destroy_sp(splxy)
CALL destroy(mat)
!
CALL closef(fid)
!===========================================================================
!
CONTAINS
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:,:)
DOUBLE PRECISION :: sum2
INTEGER :: i, j
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
DO j=1,SIZE(x,2)
sum2 = sum2 + x(i,j)**2
END DO
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
SUBROUTINE prnmat(label, mat)
CHARACTER(len=*) :: label
DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
INTEGER :: i
WRITE(*,'(/a)') TRIM(label)
DO i=1,SIZE(mat,1)
WRITE(*,'(10(1pe12.3))') mat(i,:)
END DO
END SUBROUTINE prnmat
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
diff --git a/examples/pde2d_sym_wsmp_dft.f90 b/examples/pde2d_sym_wsmp_dft.f90
index f3226e9..01625ff 100644
--- a/examples/pde2d_sym_wsmp_dft.f90
+++ b/examples/pde2d_sym_wsmp_dft.f90
@@ -1,1039 +1,1039 @@
!>
!> @file pde2d_sym_wsmp_dft.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Ben McMillan
!> @author Trach-Minh Tran
!>
!
! Solving the 2d PDE using splines and WSMP symmetric matrix.
! The periodic coordinate y is discrete Fourier transformed.
!
! -d/dx[x C d/dx]f - 1x/d/dy[Cd/dy] f = \rho, with f(x=1,y) = 0
! C(x,y) = 1 + \epsilon x cos(y)
! exact solution: f(x,y) = (1-x^2) x^m cos(my)
!
MODULE pde2d_sym_wsmp_dft_mod
USE bsplines
USE wsmp_bsplines
IMPLICIT NONE
!
CONTAINS
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE dismat(spl, epsi, mat)
!
! Assembly of FE matrix mat using spline spl
!
TYPE(spline2d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(in) :: epsi
TYPE(zwsmp_mat), INTENT(inout) :: mat
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: kmin, kmax, dk
INTEGER :: i, j, ig1, ig2, kc
INTEGER :: iterm, iw1, mw, igw1, it1, mt, igt1, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:)
DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:,:), fft_temp(:)
DOUBLE COMPLEX :: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER :: kcoupl ! Number of mode couplings
INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order
DOUBLE COMPLEX, ALLOCATABLE :: coefs(:,:,:,:) ! Terms in weak form
INTEGER, ALLOCATABLE :: left1(:), left2(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
kmin = spl%sp2%dft%kmin
kmax = spl%sp2%dft%kmax
dk = spl%sp2%dft%dk
WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
WRITE(*,'(a, 5i6)') 'kmin, kmax, dk =', kmin, kmax, dk
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng2), wg2(ng2))
!
! Weak form
!
kterms = mat%nterms
kcoupl = SIZE(spl%sp2%dft%mode_couplings)
ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
ALLOCATE(coefs(kterms,kcoupl,ng1,ng2))
!
! Splines and derivatives at all Gauss points
!
ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
ALLOCATE(ft_fun2(kmin:kmax,0:1,ng2)) ! DFT of splines and 1st derivative
ALLOCATE(fft_temp(0:n2-1)) ! Used in coefeq
!===========================================================================
! 2.0 Assembly loop
!
ALLOCATE(left1(ng1))
ALLOCATE(left2(ng2))
!
! First interval in 2nd (periodic) coordinate
!
j = 1
CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2)
left2 = j
CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2)
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
left1 = i
CALL basfun(xg1, spl%sp1, fun1, left1)
DO ig1=1,ng1
DO ig2=1,ng2
CALL coefeq(xg1(ig1), xg2(ig2), &
& idert(:,:,ig1,ig2), &
& iderw(:,:,ig1,ig2), &
& coefs(:,:,ig1,ig2))
END DO
END DO
!
DO iw1=0,nidbas1 ! Weight function in dir 1
igw1 = i+iw1
DO it1=0,nidbas1 ! Test function in dir 1
igt1 = i+it1
DO mt=kmin,kmax ! Test Fourier mode
DO kc=1,kcoupl
mw = mt + spl%sp2%dft%mode_couplings(kc)
IF(mw.LT.kmin .OR. mw.GT.kmax) CYCLE
!-------------
contrib = (0.0d0, 0.0d0)
DO ig1=1,ng1
DO ig2=1,ng2
DO iterm=1,kterms
contrib = contrib + &
& fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
& ft_fun2(mw,iderw(iterm,2,ig1,ig2),ig2) * &
& coefs(iterm,kc,ig1,ig2) * &
& CONJG(ft_fun2(mt,idert(iterm,2,ig1,ig2),ig2)) * &
& fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
& wg1(ig1) * wg2(ig2) /REAL(n2,8)
END DO
END DO
END DO
irow = (igw1-1)*dk + (mw-kmin)+1 ! Number first mode m then radial coord.
jcol = (igt1-1)*dk + (mt-kmin)+1
CALL updtmat(mat, irow, jcol, contrib)
!-------------
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, ft_fun2)
DEALLOCATE(idert, iderw, coefs)
DEALLOCATE(left1,left2)
DEALLOCATE(fft_temp)
!
CONTAINS
SUBROUTINE coefeq(x, y, idt, idw, c)
USE fft
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE COMPLEX, INTENT(out) :: c(:,:)
!
DOUBLE PRECISION :: zcoef, dy
INTEGER :: j, k, kc, kp
!
! Weak form = Int(x*C*dw/dx*dt/dx + C/x*dw/dy*dt/dy)dxdy
! C(x,y) = 1 + epsilon*x*cos(y)
!
dy = spl%sp2%dft%dx
kc = SIZE(spl%sp2%dft%mode_couplings)
DO j=0,n2-1
fft_temp(j) = 1.0d0+epsi*x*COS(y+j*dy)
END DO
CALL fourcol(fft_temp,1)
DO k=1,kc
kp = spl%sp2%dft%mode_couplings(k)
IF(kp.LT.0) kp=kp+n2
c(1,k) = x*fft_temp(kp)
c(2,k) = fft_temp(kp)/x
END DO
!!$ WRITE(*,'(a/(10(1pe12.4)))') 'fft_temp', ABS(fft_temp)
!!$ WRITE(*,'(a/(10(1pe12.4)))') 'c1', ABS(c(1,:))
!!$ WRITE(*,'(a/(10(1pe12.4)))') 'c2', ABS(c(2,:))
!
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE disrhs(mbess, epsi, spl, rhs)
!
! Assembly the RHS using 2d spline spl
!
INTEGER, INTENT(in) :: mbess
DOUBLE PRECISION, INTENT(in) :: epsi
TYPE(spline2d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
!
ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!===========================================================================
! 2.0 Assembly loop
!
nrank = SIZE(rhs)
rhs(1:nrank) = 0.0d0
!
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), spl%sp1, fun1, i)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), spl%sp2, fun2, j)
contrib = wg1(ig1)*wg2(ig2) * &
& rhseq(xg1(ig1),xg2(ig2), mbess)
DO k1=0,nidbas1
i1 = i+k1
DO k2=0,nidbas2
j2 = MODULO(j+k2-1,n2) + 1
ij = j2 + (i1-1)*n2
rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x, y, m)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(in) :: m
!
DOUBLE PRECISION :: xm
xm = REAL(m,8)
rhseq = x**(m+1) * ( 4.0d0*(xm+1.0d0)*COS(xm*y) + &
& epsi*x*( &
& ( (3.0d0*(xm+1.0d0) - xm/x**2)*COS((xm-1.0d0)*y) + &
& (3.0d0+2.0d0*xm)*COS((xm+1.0d0)*y) ) &
& ))
END FUNCTION rhseq
END SUBROUTINE disrhs
!
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcmat(mat, dft)
!
! Apply BC on matrix
!
TYPE(zwsmp_mat), INTENT(inout) :: mat
TYPE(dftmap), INTENT(in) :: dft
INTEGER :: nrank, k, kmin, kmax, dk, i
DOUBLE COMPLEX :: arr(mat%rank)
!===========================================================================
! 1.0 Prologue
!
nrank = mat%rank
kmin = dft%kmin
kmax = dft%kmax
dk = dft%dk
!===========================================================================
! 2.0 BC at the axis
!
! zero for non-zero modes
!
DO k=kmin,kmax
IF(k.NE.0) THEN
i = k-kmin+1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END IF
END DO
!===========================================================================
! 3.0 Dirichlet on right boundary
!
DO i = nrank, nrank-dk+1, -1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!
END SUBROUTINE ibcmat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcrhs(rhs, dft)
!
! Apply BC on RHS
!
DOUBLE COMPLEX, INTENT(inout) :: rhs(:)
TYPE(dftmap), INTENT(in) :: dft
INTEGER :: nrank, kmin, kmax, dk, k
!===========================================================================
! 1.0 Prologue
!
nrank = SIZE(rhs,1)
kmin = dft%kmin
kmax = dft%kmax
dk = dft%dk
!===========================================================================
! 2.0 BC at the axis
!
! zero for non-zero modes
!
DO k=kmin,kmax
IF(k.NE.0) rhs(k-kmin+1) = 0.0
END DO
!===========================================================================
! 3.0 Dirichlet on right boundary
!
rhs(nrank-dk+1:nrank) = 0.0d0
END SUBROUTINE ibcrhs
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE spectrum0(spl, carr, xpt, cspec)
!
! DFT modes at xpt (integration on the first interval)
!
DOUBLE COMPLEX, PARAMETER :: ci = (0.0d0,1.0d0)
DOUBLE PRECISION, PARAMETER :: pi = 3.141592653589793d0
TYPE(spline2d), INTENT(in) :: spl
DOUBLE COMPLEX, INTENT(in) :: carr(:)
DOUBLE PRECISION, INTENT(in) :: xpt
DOUBLE COMPLEX, INTENT(out) :: cspec(:)
!
INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
INTEGER :: k, kmin, kmax, dk, kk
INTEGER :: ng2, ig2
INTEGER, ALLOCATABLE :: left2(:)
DOUBLE PRECISION :: temp(1)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:)
DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:,:), psi(:), coefs(:,:)
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
kmin = spl%sp2%dft%kmin
kmax = spl%sp2%dft%kmax
dk = spl%sp2%dft%dk
!
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(left2(ng2))
ALLOCATE(xg2(ng2), wg2(ng2))
ALLOCATE(ft_fun2(kmin:kmax,1,ng2)) ! DFT of splines
ALLOCATE(psi(kmin:kmax))
!
! Integration over first interval
!
left2 = 1
CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2)
CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2)
psi = (0.0d0,0.0d0)
DO k=kmin,kmax
DO ig2=1,ng2
psi(k) = psi(k) + wg2(ig2)*EXP(k*ci*xg2(ig2))*CONJG(ft_fun2(k,1,ig2))
END DO
END DO
!
ALLOCATE(coefs(dk,ndim1))
coefs = RESHAPE(carr, SHAPE(coefs))
temp = xpt
DO kk=kmin,kmax
k=kk-kmin+1
coefs(k,:) = psi(kk)*coefs(k,:)
CALL gridval(spl%sp1, temp, cspec(k:k), 0, coefs(k,:))
END DO
cspec = cspec/(2.0d0*pi)
!
DEALLOCATE(left2)
DEALLOCATE(xg2, wg2)
DEALLOCATE(ft_fun2)
DEALLOCATE(psi)
DEALLOCATE(coefs)
END SUBROUTINE spectrum0
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE spectrum1(spl, carr, xpt, ypt0, cspec)
!
! DFT modes at xpt (at the initial ypt0)
!
DOUBLE COMPLEX, PARAMETER :: ci = (0.0d0,1.0d0)
!
TYPE(spline2d), INTENT(in) :: spl
DOUBLE COMPLEX, INTENT(in) :: carr(:)
DOUBLE PRECISION, INTENT(in) :: xpt, ypt0
DOUBLE COMPLEX, INTENT(out) :: cspec(:)
!
INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
INTEGER :: k, kmin, kmax, dk
DOUBLE PRECISION :: temp(1)
DOUBLE COMPLEX :: ctemp(1)
DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:), coefs(:,:)
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
kmin = spl%sp2%dft%kmin
kmax = spl%sp2%dft%kmax
dk = spl%sp2%dft%dk
!
! DFT of splines at ypt0
ALLOCATE(ft_fun2(kmin:kmax,1))
ALLOCATE(coefs(kmin:kmax,ndim1))
CALL ft_basfun(ypt0, spl%sp2, ft_fun2, 1)
coefs = RESHAPE(carr, SHAPE(coefs))
!
temp = xpt
DO k=kmin,kmax
CALL gridval(spl%sp1, temp, ctemp, 0, coefs(k,:))
cspec(k-kmin+1) = CONJG(ft_fun2(k,1))*ctemp(1)*EXP(k*ci*ypt0)
END DO
cspec = cspec/REAL(n2,8)
!
DEALLOCATE(ft_fun2)
DEALLOCATE(coefs)
END SUBROUTINE spectrum1
!!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE spectrum2(spl, xpt, ypt0, cspec)
!
! DFT modes at xpt (at the initial ypt0)
!
USE fft
USE bsplines
!
DOUBLE COMPLEX, PARAMETER :: ci = (0.0d0,1.0d0)
!
TYPE(spline2d) :: spl
DOUBLE PRECISION, INTENT(in) :: xpt, ypt0
DOUBLE COMPLEX, INTENT(out) :: cspec(:)
!
INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
INTEGER :: k, kmin, kmax, dk
DOUBLE PRECISION, ALLOCATABLE :: ypt(:), fun(:,:)
DOUBLE COMPLEX, ALLOCATABLE :: ft_fun(:)
DOUBLE PRECISION :: temp(1)
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
kmin = spl%sp2%dft%kmin
kmax = spl%sp2%dft%kmax
dk = spl%sp2%dft%dk
!
ALLOCATE(ypt(0:n2-1))
ALLOCATE(fun(1, 0:n2-1))
ALLOCATE(ft_fun(0:n2-1))
!
! Function values at points ypt
!
ypt(0:n2-1) = ypt0 + spl%sp2%knots(0:n2-1)
temp = xpt
CALL gridval(spl, temp, ypt, fun, (/0,0/))
ft_fun = fun(1,:)
!
! Discrete Fourier Transform
!
CALL fourcol(ft_fun, 1)
DO k=kmin,kmax
IF(k.LT.0) THEN
cspec(k-kmin+1) = ft_fun(k+n2)*EXP(k*ci*ypt0)
ELSE
cspec(k-kmin+1) = ft_fun(k)*EXP(k*ci*ypt0)
END IF
END DO
cspec = cspec/REAL(n2,8)
!
DEALLOCATE(ypt)
DEALLOCATE(fun)
DEALLOCATE(ft_fun)
END SUBROUTINE spectrum2
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE pde2d_sym_wsmp_dft_mod
PROGRAM main
USE pde2d_sym_wsmp_dft_mod
USE futils
USE fft
!
IMPLICIT NONE
INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms, kmin, kmax, dk
INTEGER :: n_mode_couplings
INTEGER, ALLOCATABLE :: mode_couplings(:)
LOGICAL :: nlppform
INTEGER :: i, j, ij, dimx, dimy, nrank, nrank_full, jder(2), it, i0, i0_r
INTEGER :: k, kp, ik
DOUBLE PRECISION :: pi, epsi, coefx(5), coefy(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
DOUBLE COMPLEX, ALLOCATABLE :: crhs(:), crhs_r(:), csol(:), csol_r(:)
TYPE(spline2d) :: splxy
TYPE(zwsmp_mat) :: mat
!
CHARACTER(len=128) :: file='pde2d_sym_wsmp_dft.h5'
INTEGER :: fid
DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: arr, srow
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
DOUBLE PRECISION :: seconds, mem, dopla
DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tfour, tfour0, tgrid, gflops1
INTEGER :: nits=100
LOGICAL :: nlmetis, nlforce_zero, nlpos
!
DOUBLE PRECISION :: xpt, ypt0
DOUBLE COMPLEX, ALLOCATABLE :: cspec0(:), cspec(:), energy_k(:)
DOUBLE COMPLEX :: energy, energy_exact
!
NAMELIST /newrun/ nx, ny, nidbas, ngauss, kmin, kmax, mbess, epsi, &
& nlppform, nlmetis, nlforce_zero, nlpos, coefx, coefy
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number of intervals in x
ny = 8 ! Number of intervals in y
nidbas = (/3,3/) ! Degree of splines
ngauss = (/4,4/) ! Number of Gauss points/interval
kmin = -3 ! Minimum Fourier mode number
kmax = 3 ! Maximum Fourier mode number
mbess = 2 ! Exponent of differential problem
epsi = 0.5 ! Non-uniformity in the Laplacian coefficicient
nterms = 2 ! Number of terms in weak form
nlppform = .TRUE. ! Use PPFORM for gridval or not
nlmetis = .FALSE. ! Use metis ordering or minimum degree
nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele
nlpos = .TRUE. ! Matrix is positive definite
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
WRITE(*,newrun)
!!
! Read table of mode couplings
!
READ(*,*) n_mode_couplings
ALLOCATE(mode_couplings(n_mode_couplings))
READ(*,*) mode_couplings
WRITE(*,'(/a/(20i4))') 'Mode couplings', mode_couplings
!
! Define grid on x (=radial) & y (=poloidal) axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny))
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi
CALL meshdist(coefy, ygrid, ny)
!
! Exact energy
!
energy_exact = 2.0d0*pi/REAL(2+mbess,8)
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
CALL attach(fid, '/', 'MBESS', mbess)
CALL attach(fid, '/', 'EPSI', epsi)
CALL attach(fid, '/', 'KMIN', kmin)
CALL attach(fid, '/', 'KMAX', kmax)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, &
& (/.FALSE., .TRUE./))
!
! Init DFT for spline in 2nd direction
!
CALL init_dft(splxy%sp2, kmin, kmax, mode_couplings)
dk = splxy%sp2%dft%dk
!
! FE matrix assembly
!
dimx = splxy%sp1%dim
dimy = splxy%sp2%dim
nrank = (nx+nidbas(1))*dk ! Rank of restricted matrix
nrank_full = (nx+nidbas(1))*ny ! Rank of full matrix
!
ALLOCATE(rhs(nrank_full), sol(nrank_full))
ALLOCATE(crhs(nrank_full), csol(nrank_full))
ALLOCATE(crhs_r(nrank), csol_r(nrank))
!
WRITE(*,'(a,i8)') 'nrank_full', nrank_full
WRITE(*,'(a,i8)') 'nrank ', nrank
!
t0 = seconds()
CALL init(nrank, nterms, mat, nlherm=.TRUE., nlpos=nlpos)
CALL dismat(splxy, epsi, mat)
ALLOCATE(arr(nrank))
ALLOCATE(srow(nrank))
DO i=1,nrank
CALL getrow(mat, i, arr)
srow(i) = SUM(arr)
END DO
!!$ WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows before BC', REAL(srow)
!!$ WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows before BC', AIMAG(srow)
PRINT*, 'Sum of mat before BC', SUM(srow)
!
! BC on Matrix
!
WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
CALL ibcmat(mat, splxy%sp2%dft)
tmat = seconds() - t0
DO i=1,nrank
CALL getrow(mat, i, arr)
srow(i) = SUM(arr)
END DO
!!$ WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows after BC', REAL(srow)
!!$ WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows after BC', AIMAG(srow)
PRINT*, 'Sum of mat after BC', SUM(srow)
!
WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
!
! RHS assembly
!
CALL disrhs(mbess, epsi, splxy, rhs)
!
! Init FFT
!
t0 = seconds()
CALL fourcol(crhs(1:ny),1)
CALL fourcol(crhs(1:ny),-1)
tfour0 = seconds()-t0
crhs = crhs/REAL(ny,8)
!
! DFT of RHS
!
t0 = seconds()
crhs = rhs
DO i=1,nx+nidbas(1)
i0 = (i-1)*ny
CALL fourcol(crhs(i0+1:i0+ny), 1)
END DO
tfour = seconds()-t0
!
! Restriction in Fourier space
! k = kmin:kmax (restricted)
! kp = 0:ny-1 (full)
!
DO i=1,nx+nidbas(1)
i0 = (i-1)*ny
i0_r = (i-1)*dk
DO k=kmin,kmax
kp = k
IF(kp.LT.0) kp = kp+ny
crhs_r(i0_r+k-kmin+1) = crhs(i0+kp+1)
END DO
END DO
!
! BC on RHS
!
CALL ibcrhs(crhs_r, splxy%sp2%dft)
!
IF(nrank.LT.100) THEN
WRITE(*,'(a/(10(1pe12.3)))') 'Real of crhs', REAL(crhs)
WRITE(*,'(a/(10(1pe12.3)))') 'Imag of crhs', AIMAG(crhs)
END IF
!===========================================================================
! 3.0 Solve the dicretized system
!
! Matrix factorization
!
t0 = seconds()
!!$ CALL factor(mat)
CALL to_mat(mat)
CALL reord_mat(mat); CALL putmat(fid, '/MAT1', mat)
CALL numfact(mat)
tfact = seconds() - t0
DO i=1,nrank
CALL getrow(mat, i, arr)
srow(i) = SUM(arr)
END DO
PRINT*, 'Sum of mat after factor', SUM(srow)
PRINT*, 'iparm(64) after factor', mat%p%iparm(64)
WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24)
WRITE(*,'(a,1pe12.3)') 'Number of factorization GFLOPS = ',mat%p%dparm(23)/1.d9
gflops1 = mat%p%dparm(23) / tfact / 1.d9
!
! Backsolve
!
t0 = seconds()
PRINT*, 'SUM of crhs_r', SUM(crhs_r)
CALL bsolve(mat, crhs_r, csol_r)
tsolv = seconds() - t0
WRITE(*,'(a,1pe12.4)') 'Residue =', cnorm2(vmx(mat,csol_r)-crhs_r)
PRINT*, 'SUM of csol_r', SUM(csol_r)
PRINT*, 'iparm(64) after bsolve', mat%p%iparm(64)
PRINT*, 'Residue from WSMP', mat%p%dparm(7)
WRITE(*,'(a/(20i4))') 'iparm', mat%p%iparm
!
CALL putarr(fid, '/FT_RHS', crhs_r, 'DFT of RHS')
CALL putarr(fid, '/FT_SOL', csol_r, 'DFT of Spline coefficients')
!===========================================================================
! 4.0 Perform some diagnostics in Fourier space
!
! Fourier spectrum at xpt
!
xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8))
ALLOCATE(cspec0(dk))
CALL spectrum0(splxy, csol_r, xpt, cspec0)
WRITE(*,'(/a,f10.5)') 'DFT spectrum (by integration) at x = ', xpt
DO k=kmin,kmax
WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
END DO
!
ypt0 = 0.0d0
WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0
CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0)
DO k=kmin,kmax
WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
END DO
ypt0 = splxy%sp2%dft%dx/2.0d0 ! Center of first interval
WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0
CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0)
DO k=kmin,kmax
WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
END DO
!
! Spectral energy
!
WRITE(*,'(/a)') 'Spectral energies'
ALLOCATE(energy_k(kmin:kmax))
energy_k = (0.0d0,0.0d0)
DO i=1,dimx
i0_r = (i-1)*dk
DO k=kmin,kmax
ik = i0_r+k-kmin+1
energy_k(k) = energy_k(k) + csol_r(ik)*CONJG(crhs_r(ik))
END DO
END DO
energy_k = energy_k/REAL(ny,8)
energy = SUM(energy_k)
DO k=kmin,kmax
WRITE(*,'(i5,3(1pe15.3))') k, energy_k(k), ABS(energy_k(k))
END DO
WRITE(*,'(a5,4(1pe15.3))') 'Sum', energy, ABS(energy), REAL(energy-energy_exact)
!
CALL putarr(fid, '/ENERGY_K', energy_k, 'Spectral energies')
!===========================================================================
! 5.0 Transform back to real space
!
! Expand to full Fourier space
! k = kmin:kmax (restricted)
! kp = 0:ny-1 (full)
!
crhs = (0.0d0,0.0d0)
DO i=1,nx+nidbas(1)
i0 = (i-1)*ny
i0_r = (i-1)*dk
DO k=kmin,kmax
kp = k
IF(kp.LT.0) kp = kp+ny
csol(i0+kp+1) = csol_r(i0_r+k-kmin+1)
END DO
END DO
!
! Fourier transform back to real space
!
t0 = seconds()
DO i=1,nx+nidbas(1)
i0 = (i-1)*ny
CALL fourcol(csol(i0+1:i0+ny),-1)
END DO
sol = REAL(csol)/REAL(ny,8)
tfour = tfour + seconds()-t0
!
!
! Spline coefficients, taking into account of periodicity in y
! Note: in SOL, y was numbered first.
!
ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
DO j=0,dimy-1
DO i=0,dimx-1
ij = MODULO(j,ny) + i*ny + 1
bcoef(i,j) = sol(ij)
END DO
END DO
CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
!
! Total energy
!
WRITE(*,'(/a, 2(1pe15.3))') 'Total energy and error(real space)', &
& DOT_PRODUCT(rhs,sol), &
& DOT_PRODUCT(rhs,sol)-REAL(energy_exact)
WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
!===========================================================================
! 6.0 Check the solution
!
! Check function values computed with various method
!
ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
DO i=0,nx
DO j=0,ny
solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
END DO
END DO
jder = (/0,0/)
!
! Compute PPFORM/BCOEFS at first call to gridval
!
CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
!
! Fourier spectrum at xpt
!
xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8))
ALLOCATE(cspec(dk))
!
ypt0 = 0.0d0
WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0
CALL spectrum2(splxy, xpt, ypt0, cspec)
DO k=kmin,kmax
WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1))
END DO
!
ypt0 = splxy%sp2%dft%dx/2.0d0 ! Center of first interval
WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0
CALL spectrum2(splxy, xpt, ypt0, cspec)
DO k=kmin,kmax
WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1))
END DO
!
WRITE(*,'(/a)') '*** Checking solutions'
t0 = seconds()
DO it=1,nits ! nits iterations for timing
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
END DO
tgrid = (seconds() - t0)/REAL(nits)
errsol = solana - solcal
IF( SIZE(bcoef,2) .LE. 10 ) THEN
CALL prnmat('BCOEF', bcoef)
CALL prnmat('SOLANA', solana)
CALL prnmat('SOLCAL', solcal)
CALL prnmat('ERRSOL', errsol)
END IF
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
& norm2(errsol) / norm2(solana)
WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid
!
CALL putarr(fid, '/xgrid', xgrid, 'r')
CALL putarr(fid, '/ygrid', ygrid, '\theta')
CALL putarr(fid, '/sol', solcal, 'Solutions')
CALL putarr(fid, '/solana', solana,'Exact solutions')
CALL putarr(fid, '/errors', errsol, 'Errors')
!
! Check derivatives d/dx and d/dy
!
WRITE(*,'(/a)') '*** Checking gradient'
DO i=0,nx
DO j=0,ny
IF( mbess .EQ. 0 ) THEN
solana(i,j) = -2.0d0 * xgrid(i)
ELSE
solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
& xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
END IF
END DO
END DO
!
jder = (/1,0/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
errsol = solana - solcal
CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol)
!
DO i=0,nx
DO j=0,ny
solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
END DO
END DO
!
jder = (/0,1/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
errsol = solana - solcal
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol)
!
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv
WRITE(*,'(a,1pe12.3)') 'Init FFT time (s) ', tfour0
WRITE(*,'(a,1pe12.3)') 'FFT time (s) ', tfour
WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(cspec0, cspec)
DEALLOCATE(mode_couplings)
DEALLOCATE(xgrid, ygrid, rhs, sol)
DEALLOCATE(crhs, csol)
DEALLOCATE(crhs_r, csol_r)
DEALLOCATE(solcal, solana, errsol)
DEALLOCATE(bcoef)
DEALLOCATE(arr)
DEALLOCATE(srow)
DEALLOCATE(energy_k)
CALL destroy_sp(splxy)
CALL destroy(mat)
!
CALL closef(fid)
!===========================================================================
!
CONTAINS
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:,:)
DOUBLE PRECISION :: sum2
INTEGER :: i, j
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
DO j=1,SIZE(x,2)
sum2 = sum2 + x(i,j)**2
END DO
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
!
FUNCTION cnorm2(x)
DOUBLE COMPLEX, INTENT(in) :: x(:)
DOUBLE PRECISION :: cnorm2
cnorm2 = SQRT(DOT_PRODUCT(x,x))
END FUNCTION cnorm2
!
SUBROUTINE prnmat(label, mat)
CHARACTER(len=*) :: label
DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
INTEGER :: i
WRITE(*,'(/a)') TRIM(label)
DO i=1,SIZE(mat,1)
WRITE(*,'(10(1pe12.3))') mat(i,:)
END DO
END SUBROUTINE prnmat
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
diff --git a/examples/pde2d_wsmp.f90 b/examples/pde2d_wsmp.f90
index 6a20630..f227e92 100644
--- a/examples/pde2d_wsmp.f90
+++ b/examples/pde2d_wsmp.f90
@@ -1,711 +1,711 @@
!>
!> @file pde2d_wsmp.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
!
! Solving the 2d PDE using splines and WSMP non-symmetric matrix
!
! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
! exact solution: f(x,y) = (1-x^2) x^m cos(my)
!
MODULE pde2d_wsmp_mod
USE bsplines
USE wsmp_bsplines
IMPLICIT NONE
CONTAINS
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE dismat(spl, mat)
!
! Assembly of FE matrix mat using spline spl
!
TYPE(spline2d), INTENT(in) :: spl
TYPE(wsmp_mat), INTENT(inout) :: mat
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2
INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
DOUBLE PRECISION:: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order
DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form
INTEGER, ALLOCATABLE :: left1(:), left2(:)
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
ALLOCATE(coefs(kterms,ng1,ng2))
!
ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
ALLOCATE(fun2(0:nidbas2,0:1,ng2)) !
!===========================================================================
! 2.0 Assembly loop
!
ALLOCATE(left1(ng1))
ALLOCATE(left2(ng2))
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
left1 = i
CALL basfun(xg1, spl%sp1, fun1, left1)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
left2 = j
CALL basfun(xg2, spl%sp2, fun2, left2)
!
DO ig1=1,ng1
DO ig2=1,ng2
CALL coefeq(xg1(ig1), xg2(ig2), &
& idert(:,:,ig1,ig2), &
& iderw(:,:,ig1,ig2), &
& coefs(:,ig1,ig2))
END DO
END DO
!
DO iw1=0,nidbas1 ! Weight function in dir 1
igw1 = i+iw1
DO iw2=0,nidbas2 ! Weight function in dir 2
igw2 = MODULO(j+iw2-1, n2) + 1
irow = igw2 + (igw1-1)*n2
DO it1=0,nidbas1 ! Test function in dir 1
igt1 = i+it1
DO it2=0,nidbas2 ! Test function in dir 2
igt2 = MODULO(j+it2-1, n2) + 1
jcol = igt2 + (igt1-1)*n2
!-------------
contrib = 0.0d0
DO ig1=1,ng1
DO ig2=1,ng2
DO iterm=1,kterms
contrib = contrib + &
& fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
& fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
& coefs(iterm,ig1,ig2) * &
& fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
& fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
& wg1(ig1) * wg2(ig2)
END DO
END DO
END DO
CALL updtmat(mat, irow, jcol, contrib)
!-------------
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
DEALLOCATE(idert, iderw, coefs)
DEALLOCATE(left1,left2)
!
CONTAINS
SUBROUTINE coefeq(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
!
! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
!
c(1) = x !
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.d0/x
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE disrhs(mbess, spl, rhs)
!
! Assembly the RHS using 2d spline spl
!
INTEGER, INTENT(in) :: mbess
TYPE(spline2d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
!
ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!===========================================================================
! 2.0 Assembly loop
!
nrank = SIZE(rhs)
rhs(1:nrank) = 0.0d0
!
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), spl%sp1, fun1, i)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), spl%sp2, fun2, j)
contrib = wg1(ig1)*wg2(ig2) * &
& rhseq(xg1(ig1),xg2(ig2), mbess)
DO k1=0,nidbas1
i1 = i+k1
DO k2=0,nidbas2
j2 = MODULO(j+k2-1,n2) + 1
ij = j2 + (i1-1)*n2
rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
DOUBLE PRECISION, INTENT(in) :: x1, x2
INTEGER, INTENT(in) :: m
rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
END FUNCTION rhseq
END SUBROUTINE disrhs
!
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcmat(mat, ny)
!
! Apply BC on matrix
!
TYPE(wsmp_mat), INTENT(inout) :: mat
INTEGER, INTENT(in) :: ny
INTEGER :: nrank, i, j
DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
!===========================================================================
! 1.0 Prologue
!
nrank = mat%rank
ALLOCATE(zsum(nrank), arr(nrank))
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum on the NY-th row
!
zsum = 0.0d0
DO i=1,ny
arr = 0.0d0
CALL getrow(mat, i, arr)
zsum(:) = zsum(:) + arr(:)
END DO
CALL putrow(mat, ny, zsum)
!
! The horizontal sum on the NY-th column
!
zsum = 0.0d0
DO j=1,ny
arr = 0.0d0
CALL getcol(mat, j, arr)
zsum(ny:) = zsum(ny:) + arr(ny:)
END DO
CALL putcol(mat, ny, zsum)
!
! The away operator
!
DO j = 1,ny-1
arr = 0.0d0; arr(j) = 1.0d0
CALL putcol(mat, j, arr)
END DO
!
DO i = 1,ny-1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!
!===========================================================================
! 3.0 Dirichlet on right boundary
!
DO j = nrank, nrank-ny+1, -1
arr = 0.0d0; arr(j) = 1.0d0
CALL putcol(mat, j, arr)
END DO
!
DO i = nrank, nrank-ny+1, -1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(zsum, arr)
!
END SUBROUTINE ibcmat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcrhs(rhs, ny)
!
! Apply BC on RHS
!
DOUBLE PRECISION, INTENT(inout) :: rhs(:)
INTEGER, INTENT(in) :: ny
INTEGER :: nrank
DOUBLE PRECISION :: zsum
!===========================================================================
! 1.0 Prologue
!
nrank = SIZE(rhs,1)
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum
!
zsum = SUM(rhs(1:ny))
rhs(ny) = zsum
rhs(1:ny-1) = 0.0d0
!===========================================================================
! 3.0 Dirichlet on right boundary
!
rhs(nrank-ny+1:nrank) = 0.0d0
END SUBROUTINE ibcrhs
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE pde2d_wsmp_mod
PROGRAM main
USE pde2d_wsmp_mod
USE futils
!
IMPLICIT NONE
INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
LOGICAL :: nlppform
INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
DOUBLE PRECISION :: pi, coefx(5), coefy(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
TYPE(spline2d) :: splxy
TYPE(wsmp_mat) :: mat
!
CHARACTER(len=128) :: file='pde2d_wsmp.h5'
INTEGER :: fid
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
DOUBLE PRECISION :: seconds, mem, dopla
DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
DOUBLE PRECISION :: tconv, treord
INTEGER :: nits=100
LOGICAL :: nlforce_zero
LOGICAL :: nlserial
!
NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, &
& nlforce_zero, nlserial, coefx, coefy
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number of intervals in x
ny = 8 ! Number of intervals in y
nidbas = (/3,3/) ! Degree of splines
ngauss = (/4,4/) ! Number of Gauss points/interval
mbess = 2 ! Exponent of differential problem
nterms = 2 ! Number of terms in weak form
nlppform = .TRUE. ! Use PPFORM for gridval or not
nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x (=radial) & y (=poloidal) axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny))
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi
CALL meshdist(coefy, ygrid, ny)
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
CALL attach(fid, '/', 'MBESS', mbess)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
t0 = seconds()
CALL set_spline(nidbas, ngauss, &
& xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
!
! FE matrix assembly
!
nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix
WRITE(*,'(a,i8)') 'nrank', nrank
!
CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero)
CALL dismat(splxy, mat)
ALLOCATE(arr(nrank))
IF(nrank.LT.100) THEN
DO i=1,nrank
CALL getele(mat, i, i, arr(i))
END DO
WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr
END IF
!
! BC on Matrix
!
WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
CALL ibcmat(mat, ny)
IF(nrank.LT.100) THEN
DO i=1,nrank
CALL getele(mat, i, i, arr(i))
END DO
WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr
WRITE(*,'(a)') 'Last rows'
DO i=nrank-ny,nrank
CALL getrow(mat, i, arr)
WRITE(*,'(10(1pe12.3))') arr
END DO
END IF
tmat = seconds() - t0
!
! RHS assembly
!
ALLOCATE(rhs(nrank), sol(nrank))
CALL disrhs(mbess, splxy, rhs)
!
! BC on RHS
!
CALL ibcrhs(rhs, ny)
!
CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
!===========================================================================
! 3.0 Solve the dicretized system
!
t0 = seconds()
CALL to_mat(mat)
tconv = seconds() -t0
WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat)
WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem()
!
t0 = seconds()
CALL reord_mat(mat)
CALL putmat(fid, '/MAT', mat)
treord = seconds() - t0
!
t0 = seconds()
CALL numfact(mat)
tfact = seconds() - t0
WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24)
WRITE(*,'(a,1pe12.3)') 'Number of factorization GFLOPS = ',mat%p%dparm(23)/1.d9
gflops1 = mat%p%dparm(23) / tfact / 1.d9
!
CALL bsolve(mat, rhs, sol)
t0 = seconds()
DO it=1,nits ! nits iterations for timing
CALL bsolve(mat, rhs, sol)
sol(1:ny-1) = sol(ny)
END DO
WRITE(*,'(/a,i6)') 'Number of refinement steps = ',mat%p%iparm(26)
WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
tsolv = (seconds() - t0)/REAL(nits)
!
! Spline coefficients, taking into account of periodicity in y
! Note: in SOL, y was numbered first.
!
dimx = splxy%sp1%dim
dimy = splxy%sp2%dim
ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
DO j=0,dimy-1
DO i=0,dimx-1
ij = MODULO(j,ny) + i*ny + 1
bcoef(i,j) = sol(ij)
END DO
END DO
WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
!===========================================================================
! 4.0 Check the solution
!
! Check function values computed with various method
!
ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
DO i=0,nx
DO j=0,ny
solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
END DO
END DO
jder = (/0,0/)
!
! Compute PPFORM/BCOEFS at first call to gridval
CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
!
WRITE(*,'(/a)') '*** Checking solutions'
t0 = seconds()
DO it=1,nits ! nits iterations for timing
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
END DO
tgrid = (seconds() - t0)/REAL(nits)
errsol = solana - solcal
IF( SIZE(bcoef,2) .LE. 10 ) THEN
CALL prnmat('BCOEF', bcoef)
CALL prnmat('SOLANA', solana)
CALL prnmat('SOLCAL', solcal)
CALL prnmat('ERRSOL', errsol)
END IF
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
& norm2(errsol) / norm2(solana)
WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid
!
CALL putarr(fid, '/xgrid', xgrid, 'r')
CALL putarr(fid, '/ygrid', ygrid, '\theta')
CALL putarr(fid, '/sol', solcal, 'Solutions')
CALL putarr(fid, '/solana', solana,'Exact solutions')
CALL putarr(fid, '/errors', errsol, 'Errors')
!
! Check derivatives d/dx and d/dy
!
WRITE(*,'(/a)') '*** Checking gradient'
DO i=0,nx
DO j=0,ny
IF( mbess .EQ. 0 ) THEN
solana(i,j) = -2.0d0 * xgrid(i)
ELSE
solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
& xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
END IF
END DO
END DO
!
jder = (/1,0/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
errsol = solana - solcal
CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol)
!
DO i=0,nx
DO j=0,ny
solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
END DO
END DO
!
jder = (/0,1/)
CALL gridval(splxy, xgrid, ygrid, solcal, jder)
CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
errsol = solana - solcal
WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol)
!
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv
WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv
WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1
!===========================================================================
! 5.0 Clear the matrix and recompute
!
WRITE(*,'(/a)') 'Recompute the solver ...'
t0 = seconds()
CALL clear_mat(mat)
CALL dismat(splxy, mat)
CALL ibcmat(mat, ny)
tmat = seconds()-t0
!
t0 = seconds()
!!$ CALL numfact(mat)
CALL factor(mat, nlreord=.FALSE.)
tfact = seconds()-t0
gflops1 = mat%p%dparm(23) / tfact / 1.d9
!
t0 = seconds()
ALLOCATE(newsol(nrank))
CALL bsolve(mat, rhs, newsol)
newsol(1:ny-1) = newsol(ny)
tsolv = seconds()-t0
!
WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s) ', tsolv
WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv
WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1
!
DEALLOCATE(newsol)
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xgrid, rhs, sol)
DEALLOCATE(solcal, solana, errsol)
DEALLOCATE(bcoef)
DEALLOCATE(arr)
CALL destroy_sp(splxy)
CALL destroy(mat)
!
CALL closef(fid)
!===========================================================================
!
CONTAINS
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:,:)
DOUBLE PRECISION :: sum2
INTEGER :: i, j
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
DO j=1,SIZE(x,2)
sum2 = sum2 + x(i,j)**2
END DO
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
SUBROUTINE prnmat(label, mat)
CHARACTER(len=*) :: label
DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
INTEGER :: i
WRITE(*,'(/a)') TRIM(label)
DO i=1,SIZE(mat,1)
WRITE(*,'(10(1pe12.3))') mat(i,:)
END DO
END SUBROUTINE prnmat
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
IMPLICIT NONE
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!+++
diff --git a/examples/pde3d.f90 b/examples/pde3d.f90
index 9fed6e9..b60e046 100644
--- a/examples/pde3d.f90
+++ b/examples/pde3d.f90
@@ -1,396 +1,396 @@
!>
!> @file pde3d.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Solving the following 3d PDE using splines:
!
! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my)cos(z)^n, with f(x=1,y) = 0
! exact solution: f(x,y) = (1-x^2) x^m cos(my)cos(z)^n
!
USE futils
USE fft
USE pde3d_mod
!
IMPLICIT NONE
INTEGER :: nx, ny, nz, nidbas(3), ngauss(3), mbess, npow, nterms
LOGICAL :: nlppform
INTEGER :: i, j, k, kk, ij, dimx, dimy, dimz, nrank, kl, ku
INTEGER :: jder(3), it
DOUBLE PRECISION :: pi, coefx(5)
DOUBLE PRECISION :: dy, dz
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, zgrid
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: fftmass
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rhs, sol
DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: crhs
!
TYPE(spline2d1d), TARGET :: splxyz
TYPE(spline2d), POINTER :: splxy
TYPE(gbmat) :: mat
!
CHARACTER(len=128) :: file='pde3d.h5'
INTEGER :: fid
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
DOUBLE PRECISION :: seconds, mem, dopla
DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
INTEGER :: nits=500
!
INTEGER, PARAMETER :: npart=10
DOUBLE PRECISION, DIMENSION(npart) :: xp, yp, zp, fp_calc, fp_anal
!
INTEGER :: kmin, kmax
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: fftmass_shifted
!
NAMELIST /newrun/ nx, ny, nz, nidbas, ngauss, mbess, npow, nlppform, coefx
!===========================================================================
! 1.0 Prologue
!
! Read in data specific to run
!
nx = 8 ! Number of intervals in x
ny = 8 ! Number of intervals in y
nz = 8 ! Number of intervals in z
nidbas = (/3,3,3/) ! Degree of splines
ngauss = (/4,4, 4/) ! Number of Gauss points/interval
mbess = 2 ! Exponent of differential problem
npow = 2 ! Exponent of differential problem
nterms = 2 ! Number of terms in weak form
nlppform = .TRUE. ! Use PPFORM for gridval or not
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x (=radial) & y (=poloidal) axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz))
!
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
!
dy = 2.d0*pi/REAL(ny,8) ! Equidistant in y
ygrid = (/ (j*dy, j=0,ny) /)
!
dz = 2.0d0*pi/REAL(nz,8) ! Equidistant in z
zgrid = (/ (k*dz, k=0,nz) /)
!
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz)
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'NZ', nz)
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'NIDBAS3', nidbas(3))
CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
CALL attach(fid, '/', 'NGAUSS2', ngauss(3))
CALL attach(fid, '/', 'MBESS', mbess)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
t0 = seconds()
CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid, splxyz, &
& (/.FALSE., .TRUE., .TRUE./), nlppform=nlppform)
splxy => splxyz%sp12
!
WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Z', splxyz%sp3%knots
!
! 2D FE matrix assembly (in plane x-y)
!
nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix
kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals
ku = kl ! Number of super-diagnonals
WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
!
CALL init(kl, ku, nrank, nterms, mat)
CALL dismat(splxy, mat)
!!$ CALL putmat(fid, '/MAT0', mat, 'Assembled GB matrice')
ALLOCATE(arr(nrank))
!
! BC on Matrix
!
CALL ibcmat(mat, ny)
tmat = seconds() - t0
!
! 3D RHS assembly
!
ALLOCATE(rhs(nrank,0:nz-1), sol(nrank,0:nz-1))
CALL disrhs3(mbess, npow, splxyz, rhs)
!
! FFT in z of RHS
!
ALLOCATE(crhs(nrank,0:nz-1))
crhs = rhs
CALL fourrow(crhs, 1)
crhs = crhs/REAL(nz,8)
!
! Apply Mass matrix to crhs
!
kmin =-nz/2
kmax = nz/2-1
CALL init_dft(splxyz%sp3, kmin, kmax)
ALLOCATE(fftmass_shifted(kmin:kmax))
ALLOCATE(fftmass(0:nz-1))
CALL calc_fftmass(splxyz%sp3, fftmass_shifted)
DO k=kmin,kmax
fftmass(MODULO(k+nz,nz)) = fftmass_shifted(k)
END DO
DO k=0,nz-1
crhs(:,k) = crhs(:,k)/fftmass(k)
END DO
!
! Fourier transform back crhs to real space in z
!
CALL fourrow(crhs, -1)
rhs(:,:) = REAL(crhs(:,:),8)
!
! BC on RHS
!
CALL ibcrhs3(rhs, ny)
!
CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
!===========================================================================
! 3.0 Solve the dicretized system
!
t0 = seconds()
CALL factor(mat)
tfact = seconds() - t0
gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
t0 = seconds()
CALL bsolve(mat, rhs, sol)
!
! Backtransform of solution
!
DO k=0,nz-1
sol(1:ny-1,k) = sol(ny,k)
END DO
tsolv = seconds() - t0
gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
!
! Spline coefficients, taking into account of periodicity in y and z
! Note: in SOL, y was numbered first.
!
dimx = splxy%sp1%dim
dimy = splxy%sp2%dim
dimz = splxyz%sp3%dim
WRITE(*,'(/a,3i6)') 'dimx, dimy, dimz =', dimx, dimy, dimz
ALLOCATE(bcoef(0:dimx-1, 0:dimy-1, 0:dimz-1))
DO j=0,dimy-1
DO i=0,dimx-1
ij = MODULO(j,ny) + i*ny + 1
DO k=0,dimz-1
kk = MODULO(k,nz)
bcoef(i,j,k) = sol(ij,kk)
END DO
END DO
END DO
CALL putarr(fid, '/BCOEF', bcoef, 'Spline coefficients of solution')
!===========================================================================
! 4.0 Check the solution
!
! Check function values computed with various method
!
WRITE(*,'(/a)') 'Check with analytical solutions ...'
CALL RANDOM_NUMBER(xp)
yp=0.0d0
zp=0.0d0
jder = (/0,0,0/)
CALL gridval(splxyz, xp, yp, zp, fp_calc, jder, bcoef)
!!$ WRITE(*,'(4a12)') 'X', 'CALC', 'ANAL', 'ERROR'
!!$ DO i=1,npart
!!$ fp_anal(i) = (1-xp(i)**2) * xp(i)**mbess &
!!$ & * COS(mbess*yp(i)) * COS(zp(i))**npow
!!$ WRITE(*,'(4(1pe12.3))') xp(i), fp_calc(i), fp_anal(i), fp_calc(i)-fp_anal(i)
!!$ END DO
!
ALLOCATE(solcal(0:nx,0:ny,0:nz))
ALLOCATE(solana(0:nx,0:ny,0:nz))
ALLOCATE(errsol(0:nx,0:ny,0:nz))
DO i=0,nx
DO j=0,ny
DO k=0,nz
solana(i,j,k) = (1-xgrid(i)**2) * xgrid(i)**mbess &
& * COS(mbess*ygrid(j)) * COS(zgrid(k))**npow
END DO
END DO
END DO
!
jder = (/0,0,0/)
CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder, bcoef)
t0 = seconds()
CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder)
tgrid = seconds()-t0
errsol = solana - solcal
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
& norm2(errsol) / norm2(solana)
!
CALL putarr(fid, '/xgrid', xgrid, 'r')
CALL putarr(fid, '/ygrid', ygrid, '\theta')
CALL putarr(fid, '/zgrid', zgrid, '\phi')
CALL putarr(fid, '/sol', solcal, 'Solutions')
CALL putarr(fid, '/solana', solana,'Exact solutions')
!
! Check derivative d/dx
!
DO i=0,nx
DO j=0,ny
DO k=0,nz
IF( mbess .EQ. 0 ) THEN
solana(i,j,k) = -2.0d0 * xgrid(i) * COS(zgrid(k))**npow
ELSE
solana(i,j,k) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
& xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) * &
& COS(zgrid(k))**npow
END IF
END DO
END DO
END DO
!
jder = (/1,0,0/)
t0 = seconds()
CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder)
tgrid = tgrid + seconds()-t0
errsol = solana - solcal
CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
CALL putarr(fid, '/derivx_exact', solana)
WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dx', norm2(errsol)/norm2(solana)
!
! Check derivative d/dy
!
DO i=0,nx
DO j=0,ny
DO k=0,nz
solana(i,j,k) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * &
& SIN(mbess*ygrid(j))* COS(zgrid(k))**npow
END DO
END DO
END DO
!
jder = (/0,1,0/)
t0 = seconds()
CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder)
tgrid = tgrid + seconds()-t0
CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
CALL putarr(fid, '/derivy_exact', solana)
errsol = solana - solcal
WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dy', norm2(errsol)/norm2(solana)
!
! Check derivative d/dz
!
DO i=0,nx
DO j=0,ny
DO k=0,nz
solana(i,j,k) = -npow*(1-xgrid(i)**2) * xgrid(i)**mbess &
& * COS(mbess*ygrid(j)) * COS(zgrid(k))**(npow-1) &
& * SIN(zgrid(k))
END DO
END DO
END DO
!
jder = (/0,0,1/)
t0 = seconds()
IF(nlppform) THEN
CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder)
ELSE
CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder, bcoef)
END IF
tgrid = tgrid + seconds()-t0
CALL putarr(fid, '/derivz', solcal, 'd/dz of solutions')
CALL putarr(fid, '/derivz_exact', solana)
errsol = solana - solcal
WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dz', norm2(errsol)/norm2(solana)
!===========================================================================
! 9.0 Epilogue
!
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv
WRITE(*,'(a,1pe12.3)') 'gridval time (s) ', tgrid
WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
!
DEALLOCATE(xgrid, ygrid, zgrid)
DEALLOCATE(rhs, sol)
DEALLOCATE(crhs)
DEALLOCATE(fftmass)
DEALLOCATE(fftmass_shifted)
DEALLOCATE(bcoef)
DEALLOCATE(solcal, solana, errsol)
DEALLOCATE(arr)
CALL destroy_sp(splxyz)
CALL destroy(mat)
!
CALL closef(fid)
!===========================================================================
!
CONTAINS
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:,:,:)
DOUBLE PRECISION :: sum2
INTEGER :: i, j
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
DO j=1,SIZE(x,2)
DO k=1,SIZE(x,3)
sum2 = sum2 + x(i,j,k)**2
END DO
END DO
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
SUBROUTINE prnmat(label, mat)
CHARACTER(len=*) :: label
DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
INTEGER :: i
WRITE(*,'(/a)') TRIM(label)
DO i=1,SIZE(mat,1)
WRITE(*,'(10(1pe12.3))') mat(i,:)
END DO
END SUBROUTINE prnmat
END PROGRAM main
!
!+++
diff --git a/examples/pde3d_mod.f90 b/examples/pde3d_mod.f90
index 79c3edf..20de074 100644
--- a/examples/pde3d_mod.f90
+++ b/examples/pde3d_mod.f90
@@ -1,397 +1,397 @@
!>
!> @file pde3d_mod.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
MODULE pde3d_mod
USE bsplines
USE matrix
IMPLICIT NONE
!
CONTAINS
SUBROUTINE dismat(spl, mat)
!
! Assembly of FE matrix mat using spline spl
!
TYPE(spline2d), INTENT(in) :: spl
TYPE(gbmat), INTENT(inout) :: mat
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2
INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION:: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw ! Derivative order
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: coefs ! Terms in weak form
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms))
!
ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative
ALLOCATE(fun2(0:nidbas2,0:1)) !
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!===========================================================================
! 2.0 Assembly loop
!
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), spl%sp1, fun1, i)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), spl%sp2, fun2, j)
CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs)
DO iterm=1,kterms
DO iw1=0,nidbas1 ! Weight function in dir 1
igw1 = i+iw1
DO iw2=0,nidbas2 ! Weight function in dir 2
igw2 = MODULO(j+iw2-1, n2) + 1
irow = igw2 + (igw1-1)*n2
DO it1=0,nidbas1 ! Test function in dir 1
igt1 = i+it1
DO it2=0,nidbas2 ! Test function in dir 2
igt2 = MODULO(j+it2-1, n2) + 1
jcol = igt2 + (igt1-1)*n2
contrib = fun1(iw1,iderw(iterm,1)) * &
& fun2(iw2,iderw(iterm,2)) * &
& coefs(iterm) * &
& fun2(it2,idert(iterm,2)) * &
& fun1(it1,idert(iterm,1)) * &
& wg1(ig1) * wg2(ig2)
CALL updtmat(mat, irow, jcol, contrib)
END DO
END DO
END DO
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
DEALLOCATE(idert, iderw, coefs)
!
CONTAINS
SUBROUTINE coefeq(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
!
! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
!
c(1) = x !
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.d0/x
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE disrhs3(mbess, npow, spl, rhs)
!
! Assembly the RHS using 3d spline spl
!
INTEGER, INTENT(in) :: mbess, npow
TYPE(spline2d1d), TARGET :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:,:)
!
TYPE(spline1d), POINTER :: sp1, sp2, sp3
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: n3, nidbas3, ndim3, ng3
INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
sp1 => spl%sp12%sp1
sp2 => spl%sp12%sp2
sp3 => spl%sp3
!
CALL get_dim(sp1, ndim1, n1, nidbas1)
CALL get_dim(sp2, ndim2, n2, nidbas2)
CALL get_dim(sp3, ndim3, n3, nidbas3)
!
ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(sp1, ng1)
CALL get_gauss(sp2, ng2)
CALL get_gauss(sp3, ng3)
WRITE(*,'(/a, 3i3)') 'Gauss points and weights, ngauss =', ng1, ng2, ng3
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng2), wg2(ng2))
ALLOCATE(xg3(ng3), wg3(ng3))
!===========================================================================
! 2.0 Assembly loop
!
nrank = SIZE(rhs,1)
rhs(1:nrank,1:n3) = 0.0d0
!
DO i=1,n1
CALL get_gauss(sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), sp1, fun1, i)
DO j=1,n2
CALL get_gauss(sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), sp2, fun2, j)
DO k=1,n3
CALL get_gauss(sp3, ng3, k, xg3, wg3)
DO ig3=1,ng3
CALL basfun(xg3(ig3), sp3, fun3, k)
contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * &
& rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow)
DO k1=0,nidbas1
i1 = i+k1
DO k2=0,nidbas2
j2 = MODULO(j+k2-1,n2) + 1
ij = j2 + (i1-1)*n2
DO k3=0,nidbas3
kk = MODULO(k+k3-1,n3) + 1
rhs(ij,kk) = rhs(ij, kk) + &
& contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1)
END DO
END DO
END DO
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
DEALLOCATE(xg3, wg3, fun3)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n)
DOUBLE PRECISION, INTENT(in) :: x1, x2, x3
INTEGER, INTENT(in) :: m, n
rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n
END FUNCTION rhseq
END SUBROUTINE disrhs3
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcmat(mat, ny)
!
! Apply BC on matrix
!
TYPE(gbmat), INTENT(inout) :: mat
INTEGER, INTENT(in) :: ny
INTEGER :: kl, ku, nrank, i, j
DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
!===========================================================================
! 1.0 Prologue
!
kl = mat%kl
ku = mat%ku
nrank = mat%rank
ALLOCATE(zsum(nrank), arr(nrank))
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum on the NY-th row
!
zsum = 0.0d0
DO i=1,ny
arr = 0.0d0
CALL getrow(mat, i, arr)
DO j=1,ny+ku
zsum(j) = zsum(j) + arr(j)
END DO
END DO
CALL putrow(mat, ny, zsum)
!
! The horizontal sum on the NY-th column
!
zsum = 0.0d0
DO j=1,ny
arr = 0.0d0
CALL getcol(mat, j, arr)
DO i=ny,ny+kl
zsum(i) = zsum(i) + arr(i)
END DO
END DO
CALL putcol(mat, ny, zsum)
!
! The away operator
!
DO j = 1,ny-1
arr = 0.0d0; arr(j) = 1.0d0
CALL putcol(mat, j, arr)
END DO
!
DO i = 1,ny-1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!
!===========================================================================
! 3.0 Dirichlet on right boundary
!
DO j = nrank, nrank-ny+1, -1
arr = 0.0d0; arr(j) = 1.0d0
CALL putcol(mat, j, arr)
END DO
!
DO i = nrank, nrank-ny+1, -1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(zsum, arr)
!
END SUBROUTINE ibcmat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcrhs3(rhs, ny)
!
! Apply BC on RHS
!
DOUBLE PRECISION, INTENT(inout) :: rhs(:,:)
INTEGER, INTENT(in) :: ny
INTEGER :: nrank, nz, k
DOUBLE PRECISION :: zsum
!===========================================================================
! 1.0 Prologue
!
nrank = SIZE(rhs,1)
nz = SIZE(rhs,2)
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum
!
DO k=1,nz
zsum = SUM(rhs(1:ny,k))
rhs(ny,k) = zsum
rhs(1:ny-1,k) = 0.0d0
END DO
!===========================================================================
! 3.0 Dirichlet on right boundary
!
DO k=1,nz
rhs(nrank-ny+1:nrank,k) = 0.0d0
END DO
END SUBROUTINE ibcrhs3
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE pde3d_mod
diff --git a/examples/poisson_mumps.f90 b/examples/poisson_mumps.f90
index 941a715..f495bd9 100644
--- a/examples/poisson_mumps.f90
+++ b/examples/poisson_mumps.f90
@@ -1,169 +1,169 @@
!>
!> @file poisson_mumps.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
USE mumps_bsplines
USE cds
!
IMPLICIT NONE
INCLUDE 'mpif.h'
TYPE(mumps_mat) :: amat
TYPE(cds_mat) :: amat_cds
DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:), arow(:)
INTEGER :: nx=5, ny=4
INTEGER :: n
INTEGER :: i, j, irow
INTEGER :: ierr, me
INTEGER, ALLOCATABLE :: dists(:)
DOUBLE PRECISION :: t0
!
CALL mpi_init(ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
!
IF(me.EQ.0) THEN
WRITE(*,'(a)', advance='no') 'Enter nx, ny: '
READ(*,*) nx, ny
END IF
CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
n = nx*ny ! Rank of the matrix
ALLOCATE(rhs(n))
ALLOCATE(sol(n))
ALLOCATE(arow(n))
!
WRITE(*,'(/a)') 'Mumps using CSR mat ...'
CALL init(n, 1, amat)
!
! Construct the matrix and RHS
!
t0 = mpi_wtime(0)
DO j=1,ny
DO i=1,nx
arow = 0.0d0
irow = numb(i,j)
arow(irow) = 4.0d0
IF(i.GT.1) arow(numb(i-1,j)) = -1.0d0
IF(i.LT.nx) arow(numb(i+1,j)) = -1.0d0
IF(j.GT.1) arow(numb(i,j-1)) = -1.0d0
IF(j.LT.ny) arow(numb(i,j+1)) = -1.0d0
CALL putrow(amat, irow, arow)
rhs(irow) = SUM(arow) ! => the exact solution is 1
END DO
END DO
!
WRITE(*,'(a,i6)') 'Rank of matrix', n
WRITE(*,'(a,i6)') 'Number of non-zeros of matrix', get_count(amat)
WRITE(*,'(a,1pe12.3)') 'Matrix construction time (s)', mpi_wtime()-t0
!
! Factor the amat matrix (Reordering, symbolic and numerical factorization)
!
t0 = mpi_wtime(0)
CALL factor(amat, nlmetis=.TRUE.)
sol=rhs
CALL bsolve(amat, sol)
WRITE(*,'(a,1pe12.3)') 'Direct solve time (s)', mpi_wtime()-t0
!
IF(me.EQ.0) THEN
WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0))
END IF
CALL destroy(amat)
!
! CDS matrix
!
WRITE(*,'(/a)') 'Mumps using CDS mat ...'
IF(ALLOCATED(dists)) DEALLOCATE(dists)
ALLOCATE(dists(-2:2))
dists = [-nx, -1, 0, 1, nx]
WRITE(*,'(a/(20i4))') 'dists used in INIT =', dists
CALL init(n, dists, 1, amat_cds)
!
t0 = mpi_wtime(0)
DO j=1,ny
DO i=1,nx
arow = 0.0d0
irow = numb(i,j)
amat_cds%val(irow,0) = 4.0d0
IF(i.GT.1) amat_cds%val(irow,-1) = -1.0d0
IF(i.LT.nx) amat_cds%val(irow,+1) = -1.0d0
IF(j.GT.1) amat_cds%val(irow,-2) = -1.0d0
IF(j.LT.ny) amat_cds%val(irow,+2) = -1.0d0
END DO
END DO
WRITE(*,'(a,1pe12.3)') 'Matrix construction time (s)', mpi_wtime()-t0
!
! Compute dists of amat
PRINT*, 'stat of mata%mat', ASSOCIATED(amat%mat)
PRINT*, 'rank of mata', amat%mat%rank
CALL mstruct(amat%mat, dists)
WRITE(*,'(A/(20i4))') 'dists from MSTRUCT=', dists
!
t0 = mpi_wtime(0)
CALL cds2mumps(amat_cds, amat)
CALL factor(amat, debug=.FALSE.)
sol = rhs
CALL bsolve(amat, sol)
WRITE(*,'(a,1pe12.3)') 'Direct solve time (s)', mpi_wtime()-t0
!
IF(me.EQ.0) THEN
WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0))
END IF
!
! Clean up
!
DEALLOCATE(rhs)
DEALLOCATE(sol)
DEALLOCATE(arow)
CALL destroy(amat)
CALL mpi_finalize(ierr)
CONTAINS
SUBROUTINE mstruct(mat, dists)
TYPE(spmat), INTENT(in) :: mat
INTEGER, ALLOCATABLE, INTENT(inout) :: dists(:)
TYPE(elt), POINTER :: t
INTEGER :: n, i, j0
j0 = LBOUND(dists,1)
n = mat%rank
PRINT*, 'rank of mat', n
DO i=1,n ! scan the matrix rows
t => mat%row(i)%row0
DO WHILE(ASSOCIATED(t)) ! walk thru the linked list row(i)
j = t%index
IF(ABS(t%val) .LE. EPSILON(0.0d0)) THEN
dists(j0) = t%index-i ! distance from main diag
j0 = j0+1
END IF
t => t%next
END DO
END DO
END SUBROUTINE mstruct
INTEGER FUNCTION numb(i,j)
!
! One-dimensional numbering
! Number first x then y
!
INTEGER, INTENT(in) :: i, j
numb = (j-1)*nx + i
END FUNCTION numb
END PROGRAM main
diff --git a/examples/poisson_petsc.f90 b/examples/poisson_petsc.f90
index 341afba..87dde81 100644
--- a/examples/poisson_petsc.f90
+++ b/examples/poisson_petsc.f90
@@ -1,218 +1,218 @@
!>
!> @file poisson_petsc.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
USE petsc_bsplines
IMPLICIT NONE
TYPE(petsc_mat) :: amat
DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:), arow(:)
INTEGER :: nx=5, ny=4, ntrials=10
INTEGER :: nitmax=10000, nits
DOUBLE PRECISION :: rtol=1.e-9
INTEGER :: n, nnz, nnz_loc
INTEGER :: i, j, irow, jcol
INTEGER :: ierr, me, npes, istart, iend
DOUBLE PRECISION :: t0
INTEGER :: ncols, cols(5) ! Max nnz by row .LE. 5
!
CHARACTER(len=128) :: matfile='mat.dat', rhsfile='rhs.dat'
LOGICAL :: file_exist
!
NAMELIST /newrun/ nx, ny, nitmax, rtol, matfile, rhsfile
!
CALL mpi_init(ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
!
IF(me.EQ.0) THEN
READ(*, newrun)
WRITE(*, newrun)
WRITE(*,'(a,i6)') 'npes =', npes
END IF
CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nitmax, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(matfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(rhsfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
!
n = nx*ny ! Rank of the matrix
!
! Initialize matrix
!
CALL init(n, 1, amat, comm=MPI_COMM_WORLD)
istart = amat%istart
iend = amat%iend
!!$ WRITE(*,'(a,i3.3,a,3i6)') 'PE', me, ': istart, iend', istart, iend
!
!
INQUIRE(file=TRIM(matfile), exist=file_exist)
!
IF( file_exist ) THEN
CALL mpi_barrier(MPI_COMM_WORLD,ierr)
t0 = mpi_wtime()
CALL load_mat(amat, matfile)
CALL mpi_barrier(MPI_COMM_WORLD,ierr)
IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'Mat. loading time (s)', mpi_wtime()-t0
ELSE
!
! Construct the matrix
!
CALL mpi_barrier(MPI_COMM_WORLD,ierr)
t0 = mpi_wtime()
ALLOCATE(arow(5)) ! Max nnz per row .LE. 5
DO j=1,ny
DO i=1,nx
irow = numb(i,j)
IF( irow.GE.istart .AND. irow.LE.iend) THEN
ncols = 1; cols(ncols) = irow; arow(ncols) = 4.0d0
IF(i.GT.1) THEN
ncols = ncols+1
cols(ncols) = numb(i-1,j); arow(ncols) = -1.0d0
END IF
IF(i.LT.nx) THEN
ncols = ncols+1
cols(ncols) = numb(i+1,j); arow(ncols) = -1.0d0
END IF
IF(j.GT.1) THEN
ncols = ncols+1
cols(ncols) = numb(i,j-1); arow(ncols) = -1.0d0
END IF
IF(j.LT.ny) THEN
ncols = ncols+1
cols(ncols) = numb(i,j+1); arow(ncols) = -1.0d0
END IF
CALL putrow(amat, irow, arow(1:ncols), cols(1:ncols))
END IF
END DO
END DO
DEALLOCATE(arow)
CALL mpi_barrier(MPI_COMM_WORLD,ierr)
IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'Mat. construction time (s)', mpi_wtime()-t0
!
! Convert to PETSC mat
!
CALL mpi_barrier(MPI_COMM_WORLD,ierr)
t0=mpi_wtime()
CALL to_mat(amat)
CALL mpi_barrier(MPI_COMM_WORLD,ierr)
IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'Mat. conversion time (s)', mpi_wtime()-t0
!
CALL save_mat(amat, matfile)
END IF
!
! Matrix size and partition could have changed after loading from file!
!
n = amat%rank
istart = amat%istart
iend = amat%iend
!
nnz_loc = get_count(amat)
CALL mpi_reduce(nnz_loc, nnz, 1, MPI_INTEGER, mpi_sum, 0, MPI_COMM_WORLD, ierr)
IF(npes.LE.4) THEN
WRITE(*,'(a,i3.3,a,3i6)') 'PE', me, ': istart, iend (after), nloc, nnz_loc', &
& istart, iend, iend-istart+1, nnz_loc
END IF
!
! Construct or read RHS
!
CALL mpi_barrier(MPI_COMM_WORLD,ierr)
t0 = mpi_wtime()
ALLOCATE(rhs(n))
INQUIRE(file=TRIM(rhsfile), exist=file_exist)
IF( file_exist ) THEN
OPEN(unit=99, file=TRIM(rhsfile), status='old', form='unformatted')
READ(99) rhs(1:n)
CLOSE(99)
CALL mpi_barrier(MPI_COMM_WORLD,ierr)
IF(me.EQ.0) THEN
WRITE(*,'(a,1pe12.3)') 'RHS read time (s)', mpi_wtime()-t0
END IF
ELSE
rhs = 0.0d0
ALLOCATE(arow(n))
DO i=istart, iend
arow = 0.0d0
CALL getrow(amat, i, arow)
rhs(i) = SUM(arow) ! => the exact solution is 1
END DO
arow = rhs
CALL mpi_allreduce(arow, rhs, n, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr)
DEALLOCATE(arow)
IF( me.EQ.0 ) THEN ! All processes have the gobla rhs
OPEN(unit=99, file=TRIM(rhsfile), status='new', form='unformatted')
WRITE(99) rhs(1:n)
CLOSE(99)
END IF
CALL mpi_barrier(MPI_COMM_WORLD,ierr)
IF(me.EQ.0) THEN
WRITE(*,'(a,1pe12.3)') 'RHS construction time (s)', mpi_wtime()-t0
END IF
END IF
CLOSE(99)
CALL mpi_barrier(MPI_COMM_WORLD,ierr)
!
! Back solve
!
ALLOCATE(sol(n))
!
CALL mpi_barrier(MPI_COMM_WORLD,ierr)
sol = 0.0d0
t0=mpi_wtime()
CALL bsolve(amat, rhs, sol, rtol, nitmax, nits)
CALL mpi_barrier(MPI_COMM_WORLD,ierr)
IF(me.EQ.0) THEN
WRITE(*,'(a,1pe12.3,i8,1pe12.3)') 'Error, nits, solve time(s)', &
& MAXVAL(ABS(sol-1.0d0)), nits, mpi_wtime()-t0
END IF
CALL mpi_barrier(MPI_COMM_WORLD,ierr)
t0=mpi_wtime()
DO i=1,ntrials
sol = 0.0d0
CALL bsolve(amat, rhs, sol, rtol, nitmax, nits)
END DO
CALL mpi_barrier(MPI_COMM_WORLD,ierr)
IF(me.EQ.0) THEN
WRITE(*,'(a,1pe12.3,i8,1pe12.3)') 'Error, nits, solve time(s)', &
& MAXVAL(ABS(sol-1.0d0)), nits, (mpi_wtime()-t0)/REAL(ntrials)
END IF
!
! Clean up
!
DEALLOCATE(rhs)
DEALLOCATE(sol)
CALL destroy(amat)
CALL PetscFinalize(ierr)
CALL mpi_finalize(ierr)
CONTAINS
INTEGER FUNCTION numb(i,j)
!
! One-dimensional numbering
! Number first x then y
!
INTEGER, INTENT(in) :: i, j
numb = (j-1)*nx + i
END FUNCTION numb
END PROGRAM main
diff --git a/examples/ppde3d.f90 b/examples/ppde3d.f90
index 00e4f06..6a895e5 100644
--- a/examples/ppde3d.f90
+++ b/examples/ppde3d.f90
@@ -1,510 +1,510 @@
!>
!> @file ppde3d.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Solving the following 3d PDE using splines:
!
! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my)cos(z)^n, with f(x=1,y) = 0
! exact solution: f(x,y) = (1-x^2) x^m cos(my)cos(z)^n
!
USE futils
USE fft
USE pputils2, ONLY : pptransp
USE ppde3d_mod
!
IMPLICIT NONE
!
CHARACTER(len=128) :: infile="ppde3d.in"
INTEGER :: l
INTEGER :: nx, ny, nz, nidbas(3), ngauss(3), mbess, npow, nterms
INTEGER :: startz, endz, nzloc
INTEGER :: start_rank, end_rank, nrank_loc
LOGICAL :: nlppform
INTEGER :: i, j, k, kk, ij, dimx, dimy, dimz, nrank, kl, ku
INTEGER :: jder(3), it
DOUBLE PRECISION :: pi, coefx(5)
DOUBLE PRECISION :: dy, dz
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, zgrid
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: fftmass
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rhs, sol, rhs_t
DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: crhs_t
!
TYPE(spline2d1d), TARGET :: splxyz
TYPE(spline2d), POINTER :: splxy
TYPE(spline1d) :: splz
TYPE(gbmat) :: mat
!
CHARACTER(len=128) :: file='ppde3d.h5'
INTEGER :: fid
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
DOUBLE PRECISION :: seconds, mem, dopla
DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
INTEGER :: nits=500
!
INTEGER, PARAMETER :: npart=10000000
INTEGER :: nploc
DOUBLE PRECISION, DIMENSION(npart) :: xp, yp, zp, fp_calc, fp_anal
DOUBLE PRECISION zsuml, zsumg, errnorm2
!
NAMELIST /newrun/ nx, ny, nz, nidbas, ngauss, mbess, npow, nlppform, coefx
!===========================================================================
! 1.0 Prologue
!
! Init MPI
!
CALL mpi_init(ierr)
CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
!
! Get input file name from command argument
!
IF( COMMAND_ARGUMENT_COUNT() .EQ. 1 ) THEN
CALL GET_COMMAND_ARGUMENT(1, infile, l, ierr)
END IF
!
! Read in data specific to run
!
nx = 8 ! Number of intervals in x
ny = 8 ! Number of intervals in y
nz = 8 ! Number of intervals in z
nidbas = (/3,3,3/) ! Degree of splines
ngauss = (/4,4, 4/) ! Number of Gauss points/interval
mbess = 2 ! Exponent of differential problem
npow = 2 ! Exponent of differential problem
nterms = 2 ! Number of terms in weak form
nlppform = .TRUE. ! Use PPFORM for gridval or not
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
OPEN(unit=99, file=TRIM(infile), status='old', action='read')
READ(99,newrun)
IF( me.EQ.0) THEN
WRITE(*,newrun)
END IF
CLOSE(99)
!
! Define grid on x (=radial) & y (=poloidal) axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz))
!
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
!
dy = 2.d0*pi/REAL(ny,8) ! Equidistant in y
ygrid = (/ (j*dy, j=0,ny) /)
!
! Partitionned toroidal grid z
!
dz = 2.0d0*pi/REAL(nz,8) ! Equidistant in z
zgrid = (/ (k*dz, k=0,nz) /)
CALL dist1d(0, nz, startz, nzloc)
endz = startz+nzloc
!!$ PRINT*, 'PE', me, ' startz, endz, nzloc', startz, endz, nzloc
!
IF( me.EQ.0) THEN
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz)
END IF
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE2D Result File', real_prec='d', &
& mpicomm=MPI_COMM_WORLD)
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'NZ', nz)
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'NIDBAS3', nidbas(3))
CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
CALL attach(fid, '/', 'NGAUSS2', ngauss(3))
CALL attach(fid, '/', 'MBESS', mbess)
CALL putarr(fid, '/xgrid', xgrid, 'r')
CALL putarr(fid, '/ygrid', ygrid, '\theta')
CALL putarr(fid, '/zgrid', zgrid(0:nz-1), '\phi')
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
t0 = seconds()
CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid(startz:endz), &
& splxyz, (/.FALSE., .TRUE., .TRUE./), nlppform=nlppform)
splxy => splxyz%sp12
!
IF( me.EQ.0) THEN
WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
END IF
CALL disp(splxyz%sp3%knots, 'KNOTS in Z', MPI_COMM_WORLD)
!
! 2D FE matrix assembly (in plane x-y)
!
nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix
kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals
ku = kl ! Number of super-diagnonals
IF(me.EQ.0) THEN
WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
END IF
!
CALL init(kl, ku, nrank, nterms, mat)
CALL dismat(splxy, mat)
ALLOCATE(arr(nrank))
!
! BC on Matrix
!
CALL ibcmat(mat, ny)
tmat = seconds() - t0
!
! 3D RHS assembly
!
ALLOCATE(rhs(nrank,0:nzloc+nidbas(3)-1)) ! With right guard cells nzloc:nzloc+nidbas3-1
ALLOCATE(sol(nrank,0:nzloc-1))
CALL disrhs3(mbess, npow, splxyz, rhs)
!
zsuml = SUM(ABS(rhs(:,0:nzloc-1)))
CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, &
& MPI_COMM_WORLD, ierr)
IF(me.EQ.0) PRINT*, 'sum of rhs after DISRHS3', zsumg
!
! FFT in z of RHS
!
CALL dist1d(1, nrank, start_rank, nrank_loc)
end_rank = start_rank+nrank_loc-1
ALLOCATE(rhs_t(0:nz-1,nrank_loc), crhs_t(0:nz-1,nrank_loc))
!
CALL pptransp(MPI_COMM_WORLD, rhs(:,0:nzloc-1), rhs_t)
crhs_t = rhs_t
CALL fourcol(crhs_t,1)
crhs_t = crhs_t/REAL(nz,8)
!
! Apply Mass matrix to crhs
!
PRINT*, 4
CALL set_spline(nidbas(3), ngauss(3), zgrid, splz, .TRUE.)
ALLOCATE(fftmass(0:nz-1))
PRINT*, 5
!!$ CALL calc_fftmass(splz, fftmass)
CALL calc_fftmass_old(splz, fftmass)
IF(me.EQ.0) THEN
WRITE(*,'(/a/(10(1pe12.3)))') 'Mass matrix', fftmass
END IF
DO k=0,nz-1
crhs_t(k,:) = crhs_t(k,:)/fftmass(k)
END DO
!
! Fourier transform back crhs to real space in z
!
CALL fourcol(crhs_t, -1)
rhs_t(:,:) = REAL(crhs_t(:,:),8)
CALL pptransp(MPI_COMM_WORLD, rhs_t, sol) ! Put the final RHS in SOL
!
! BC on RHS
!
CALL ibcrhs3(sol, ny)
!
IF(me.EQ.0) THEN
WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
END IF
!===========================================================================
! 3.0 Solve the dicretized system
!
t0 = seconds()
CALL factor(mat)
tfact = seconds() - t0
gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
t0 = seconds()
CALL bsolve(mat, sol)
!
! Backtransform of solution
!
DO k=0,nzloc-1
sol(1:ny-1,k) = sol(ny,k)
END DO
!
zsuml = SUM(ABS(sol))
CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, &
& MPI_COMM_WORLD, ierr)
IF(me.EQ.0) PRINT*, 'sum of sol', zsumg
!
tsolv = seconds() - t0
gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
!
! Spline coefficients, taking into account of periodicity in y and z
! Note: in SOL, y was numbered first.
!
dimx = splxy%sp1%dim
dimy = splxy%sp2%dim
dimz = splxyz%sp3%dim
ALLOCATE(bcoef(0:dimx-1, 0:dimy-1, 0:dimz-1))
!
! Get 3D array of spline coefs.
!
DO j=0,dimy-1
DO i=0,dimx-1
ij = MODULO(j,ny) + i*ny + 1
DO k=0,nzloc-1
bcoef(i,j,k) = sol(ij,k)
END DO
END DO
END DO
!
! Get missing coefs from remote guard cells
!
prev = MODULO(me-1,npes)
next = MODULO(me+1,npes)
count = dimx*dimy
DO i=0,nidbas(3)-1
CALL mpi_sendrecv(bcoef(0,0,i), count, MPI_DOUBLE_PRECISION, prev, 0, &
& bcoef(0,0,nzloc+i), count, MPI_DOUBLE_PRECISION, next, 0, &
& MPI_COMM_WORLD, status, ierr)
END DO
!
IF(me.EQ.0) THEN
WRITE(*,'(/a,3i6)') 'dimx, dimy, dimz =', dimx, dimy, dimz
END IF
!===========================================================================
! 4.0 Check the solution
!
! Check function values computed with various method
!
CALL RANDOM_NUMBER(xp)
CALL RANDOM_NUMBER(yp); yp = 2.d0*pi*yp
CALL RANDOM_NUMBER(zp); zp = 2.d0*pi*zp
nploc = 0
DO i=1,npart
IF(zp(i).GE.zgrid(startz) .AND. zp(i).LT.zgrid(endz)) THEN
nploc = nploc+1
xp(nploc) = xp(i)
yp(nploc) = yp(i)
zp(nploc) = zp(i)
END IF
END DO
jder = (/0,0,0/)
CALL gridval(splxyz, xp(1:nploc), yp(1:nploc), zp(1:nploc), fp_calc(1:nploc), jder, bcoef)
DO i=1,nploc
fp_anal(i) = (1-xp(i)**2) * xp(i)**mbess &
& * COS(mbess*yp(i)) * COS(zp(i))**npow
END DO
errnorm2 = norm21(fp_calc(1:nploc)-fp_anal(1:nploc))/norm21(fp_calc(1:nploc))
IF(me.EQ.0) THEN
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors, using random points', &
& errnorm2
END IF
!
ALLOCATE(solcal(0:nx,0:ny,0:nzloc-1))
ALLOCATE(solana(0:nx,0:ny,0:nzloc-1))
ALLOCATE(errsol(0:nx,0:ny,0:nzloc-1))
!
DO i=0,nx
DO j=0,ny
DO k=0,nzloc-1
kk=startz+k
solana(i,j,k) = (1-xgrid(i)**2) * xgrid(i)**mbess &
& * COS(mbess*ygrid(j)) * COS(zgrid(kk))**npow
END DO
END DO
END DO
!
jder = (/0,0,0/)
CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef)
t0 = seconds()
CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
tgrid = seconds()-t0
errsol = solana - solcal
!
errnorm2 = norm2(errsol) / norm2(solana)
IF(me.EQ.0) THEN
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
& errnorm2
END IF
CALL putarr(fid, '/sol', solcal,pardim=3)
CALL putarr(fid, '/solana', solana,pardim=3)
!
! Check derivative d/dx
!
DO i=0,nx
DO j=0,ny
DO k=0,nzloc-1
IF( mbess .EQ. 0 ) THEN
solana(i,j,k) = -2.0d0 * xgrid(i) * COS(zgrid(k+startz))**npow
ELSE
solana(i,j,k) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
& xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) * &
& COS(zgrid(k+startz))**npow
END IF
END DO
END DO
END DO
!
jder = (/1,0,0/)
t0 = seconds()
CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
tgrid = tgrid + seconds()-t0
errsol = solana - solcal
errnorm2 = norm2(errsol) / norm2(solana)
IF(me.EQ.0) THEN
WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dx', errnorm2
END IF
CALL putarr(fid, '/derivx', solcal, pardim=3)
CALL putarr(fid, '/derivx_exact', solana,pardim=3)
!
! Check derivative d/dy
!
DO i=0,nx
DO j=0,ny
DO k=0,nzloc-1
solana(i,j,k) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * &
& SIN(mbess*ygrid(j))* COS(zgrid(k+startz))**npow
END DO
END DO
END DO
!
jder = (/0,1,0/)
t0 = seconds()
CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
tgrid = tgrid + seconds()-t0
errsol = solana - solcal
errnorm2 = norm2(errsol) / norm2(solana)
IF(me.EQ.0) THEN
WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dy', errnorm2
END IF
CALL putarr(fid, '/derivy', solcal, pardim=3)
CALL putarr(fid, '/derivy_exact', solana,pardim=3)
!
! Check derivative d/dz
!
DO i=0,nx
DO j=0,ny
DO k=0,nzloc-1
solana(i,j,k) = -npow*(1-xgrid(i)**2) * xgrid(i)**mbess &
& * COS(mbess*ygrid(j)) * COS(zgrid(k+startz))**(npow-1) &
& * SIN(zgrid(k+startz))
END DO
END DO
END DO
!
jder = (/0,0,1/)
t0 = seconds()
IF(nlppform) THEN
CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
ELSE
CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef)
END IF
tgrid = tgrid + seconds()-t0
errsol = solana - solcal
errnorm2 = norm2(errsol) / norm2(solana)
IF(me.EQ.0) THEN
WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dz', errnorm2
END IF
CALL putarr(fid, '/derivz', solcal, pardim=3)
CALL putarr(fid, '/derivz_exact', solana,pardim=3)
!===========================================================================
! 9.0 Epilogue
!
IF(me.EQ.0) THEN
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv
WRITE(*,'(a,1pe12.3)') 'gridval time (s) ', tgrid
WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
END IF
!
DEALLOCATE(xgrid, ygrid, zgrid)
DEALLOCATE(fftmass)
DEALLOCATE(rhs)
DEALLOCATE(sol)
DEALLOCATE(rhs_t)
DEALLOCATE(crhs_t)
DEALLOCATE(bcoef)
DEALLOCATE(solcal, solana, errsol)
DEALLOCATE(arr)
CALL destroy_sp(splxyz)
CALL destroy_sp(splz)
CALL destroy(mat)
!
CALL closef(fid)
!
CALL mpi_finalize(ierr)
!===========================================================================
!
CONTAINS
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:,:,:)
DOUBLE PRECISION :: sum2, sum2g
INTEGER :: i, j
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
DO j=1,SIZE(x,2)
DO k=1,SIZE(x,3)
sum2 = sum2 + x(i,j,k)**2
END DO
END DO
END DO
CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
& MPI_COMM_WORLD, ierr)
norm2 = SQRT(sum2g)
END FUNCTION norm2
!
FUNCTION norm21(x)
!
! Compute the 2-norm of array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm21
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: sum2, sum2g
INTEGER :: i, j
!
sum2 = DOT_PRODUCT(x,x)
CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
& MPI_COMM_WORLD, ierr)
norm21 = SQRT(sum2g)
END FUNCTION norm21
SUBROUTINE prnmat(label, mat)
CHARACTER(len=*) :: label
DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
INTEGER :: i
WRITE(*,'(/a)') TRIM(label)
DO i=1,SIZE(mat,1)
WRITE(*,'(10(1pe12.3))') mat(i,:)
END DO
END SUBROUTINE prnmat
END PROGRAM main
!
!+++
diff --git a/examples/ppde3d_mod.f90 b/examples/ppde3d_mod.f90
index c04c788..f4f7627 100644
--- a/examples/ppde3d_mod.f90
+++ b/examples/ppde3d_mod.f90
@@ -1,473 +1,473 @@
!>
!> @file ppde3d_mod.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
MODULE ppde3d_mod
USE bsplines
USE matrix
IMPLICIT NONE
INCLUDE "mpif.h"
!
INTEGER :: me, npes
INTEGER :: prev, next
INTEGER :: count, status(MPI_STATUS_SIZE), ierr
!
CONTAINS
SUBROUTINE dismat(spl, mat)
!
! Assembly of FE matrix mat using spline spl
!
TYPE(spline2d), INTENT(in) :: spl
TYPE(gbmat), INTENT(inout) :: mat
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2
INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION:: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw ! Derivative order
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: coefs ! Terms in weak form
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms))
!
ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative
ALLOCATE(fun2(0:nidbas2,0:1)) !
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!===========================================================================
! 2.0 Assembly loop
!
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), spl%sp1, fun1, i)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), spl%sp2, fun2, j)
CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs)
DO iterm=1,kterms
DO iw1=0,nidbas1 ! Weight function in dir 1
igw1 = i+iw1
DO iw2=0,nidbas2 ! Weight function in dir 2
igw2 = MODULO(j+iw2-1, n2) + 1
irow = igw2 + (igw1-1)*n2
DO it1=0,nidbas1 ! Test function in dir 1
igt1 = i+it1
DO it2=0,nidbas2 ! Test function in dir 2
igt2 = MODULO(j+it2-1, n2) + 1
jcol = igt2 + (igt1-1)*n2
contrib = fun1(iw1,iderw(iterm,1)) * &
& fun2(iw2,iderw(iterm,2)) * &
& coefs(iterm) * &
& fun2(it2,idert(iterm,2)) * &
& fun1(it1,idert(iterm,1)) * &
& wg1(ig1) * wg2(ig2)
CALL updtmat(mat, irow, jcol, contrib)
END DO
END DO
END DO
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
DEALLOCATE(idert, iderw, coefs)
!
CONTAINS
SUBROUTINE coefeq(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
!
! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
!
c(1) = x !
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.d0/x
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE disrhs3(mbess, npow, spl, rhs)
!
! Assembly the RHS using 3d spline spl
!
INTEGER, INTENT(in) :: mbess, npow
TYPE(spline2d1d), TARGET :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:,:)
!
TYPE(spline1d), POINTER :: sp1, sp2, sp3
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: n3, nidbas3, ndim3, ng3
INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:)
DOUBLE PRECISION, ALLOCATABLE :: buf(:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
sp1 => spl%sp12%sp1
sp2 => spl%sp12%sp2
sp3 => spl%sp3
!
CALL get_dim(sp1, ndim1, n1, nidbas1)
CALL get_dim(sp2, ndim2, n2, nidbas2)
CALL get_dim(sp3, ndim3, n3, nidbas3)
!
ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(sp1, ng1)
CALL get_gauss(sp2, ng2)
CALL get_gauss(sp3, ng3)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng2), wg2(ng2))
ALLOCATE(xg3(ng3), wg3(ng3))
!===========================================================================
! 2.0 Assembly loop
!
nrank = SIZE(rhs,1)
rhs = 0.0d0
!
DO i=1,n1
CALL get_gauss(sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), sp1, fun1, i)
DO j=1,n2
CALL get_gauss(sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), sp2, fun2, j)
DO k=1,n3
CALL get_gauss(sp3, ng3, k, xg3, wg3)
DO ig3=1,ng3
CALL basfun(xg3(ig3), sp3, fun3, k)
contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * &
& rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow)
DO k1=0,nidbas1
i1 = i+k1
DO k2=0,nidbas2
j2 = MODULO(j+k2-1,n2) + 1
ij = j2 + (i1-1)*n2
DO k3=0,nidbas3
kk = k+k3
rhs(ij,kk) = rhs(ij, kk) + &
& contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1)
END DO
END DO
END DO
END DO
END DO
END DO
END DO
END DO
END DO
!
! Update from remote guard cells
!
CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
next = MODULO(me+1,npes)
prev = MODULO(me-1,npes)
count = nrank
ALLOCATE(buf(nrank))
DO i=nidbas3,1,-1
CALL mpi_sendrecv(rhs(1,n3+i), count, MPI_DOUBLE_PRECISION, next, 0, &
& buf, count, MPI_DOUBLE_PRECISION, prev, 0, &
& MPI_COMM_WORLD, status, ierr)
rhs(:,i) = rhs(:,i) + buf(:)
END DO
DEALLOCATE(buf)
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
DEALLOCATE(xg3, wg3, fun3)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n)
DOUBLE PRECISION, INTENT(in) :: x1, x2, x3
INTEGER, INTENT(in) :: m, n
rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n
END FUNCTION rhseq
END SUBROUTINE disrhs3
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcmat(mat, ny)
!
! Apply BC on matrix
!
TYPE(gbmat), INTENT(inout) :: mat
INTEGER, INTENT(in) :: ny
INTEGER :: kl, ku, nrank, i, j
DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
!===========================================================================
! 1.0 Prologue
!
kl = mat%kl
ku = mat%ku
nrank = mat%rank
ALLOCATE(zsum(nrank), arr(nrank))
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum on the NY-th row
!
zsum = 0.0d0
DO i=1,ny
arr = 0.0d0
CALL getrow(mat, i, arr)
DO j=1,ny+ku
zsum(j) = zsum(j) + arr(j)
END DO
END DO
CALL putrow(mat, ny, zsum)
!
! The horizontal sum on the NY-th column
!
zsum = 0.0d0
DO j=1,ny
arr = 0.0d0
CALL getcol(mat, j, arr)
DO i=ny,ny+kl
zsum(i) = zsum(i) + arr(i)
END DO
END DO
CALL putcol(mat, ny, zsum)
!
! The away operator
!
DO j = 1,ny-1
arr = 0.0d0; arr(j) = 1.0d0
CALL putcol(mat, j, arr)
END DO
!
DO i = 1,ny-1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!
!===========================================================================
! 3.0 Dirichlet on right boundary
!
DO j = nrank, nrank-ny+1, -1
arr = 0.0d0; arr(j) = 1.0d0
CALL putcol(mat, j, arr)
END DO
!
DO i = nrank, nrank-ny+1, -1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(zsum, arr)
!
END SUBROUTINE ibcmat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcrhs3(rhs, ny)
!
! Apply BC on RHS
!
DOUBLE PRECISION, INTENT(inout) :: rhs(:,:)
INTEGER, INTENT(in) :: ny
INTEGER :: nrank, nz, k
DOUBLE PRECISION :: zsum
!===========================================================================
! 1.0 Prologue
!
nrank = SIZE(rhs,1)
nz = SIZE(rhs,2)
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum
!
DO k=1,nz
zsum = SUM(rhs(1:ny,k))
rhs(ny,k) = zsum
rhs(1:ny-1,k) = 0.0d0
END DO
!===========================================================================
! 3.0 Dirichlet on right boundary
!
DO k=1,nz
rhs(nrank-ny+1:nrank,k) = 0.0d0
END DO
END SUBROUTINE ibcrhs3
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE dist1d(s0, ntot, s, nloc)
INCLUDE 'mpif.h'
INTEGER, INTENT(in) :: s0, ntot
INTEGER, INTENT(out) :: s, nloc
INTEGER :: me, npes, ierr, naver, rem
!
CALL MPI_COMM_SIZE(MPI_COMM_WORLD, npes, ierr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD, me, ierr)
naver = ntot/npes
rem = MODULO(ntot,npes)
s = s0 + MIN(rem,me) + me*naver
nloc = naver
IF( me.LT.rem ) nloc = nloc+1
END SUBROUTINE dist1d
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE disp(a, str, comm)
!
! Gather partitionned 1d array to 0 and print it
!
INCLUDE 'mpif.h'
DOUBLE PRECISION, INTENT(in) :: a(:)
INTEGER, INTENT(in) :: comm
CHARACTER(len=*), INTENT(in) :: str
INTEGER :: n, ntot, npes, me, ierr, i
DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:) :: c
INTEGER, ALLOCATABLE, DIMENSION(:) :: counts, displs
!
CALL mpi_comm_rank(comm, me, ierr)
CALL mpi_comm_size(comm, npes, ierr)
n = SIZE(a)
IF(me.EQ.0) THEN
ALLOCATE(counts(npes), displs(npes+1))
END IF
CALL mpi_gather(n, 1, MPI_INTEGER, counts, 1, MPI_INTEGER, 0, comm, ierr)
IF(me.EQ.0) THEN
displs(1) = 0
DO i=2,npes+1
displs(i) = displs(i-1)+counts(i-1)
END DO
ntot = displs(npes+1)
ALLOCATE(c(ntot))
c = 0.0d0
END IF
CALL mpi_gatherv(a, n, MPI_DOUBLE_PRECISION, c, counts, displs, &
& MPI_DOUBLE_PRECISION, 0, comm, ierr)
IF( me.EQ.0 ) THEN
WRITE(*,'(/a)') TRIM(str)
DO i=1,npes
WRITE(*,'(a,i3.3/(10(1pe12.3)))') 'PE', i-1, &
& c(displs(i)+1:displs(i+1))
END DO
DEALLOCATE(c)
DEALLOCATE(counts)
DEALLOCATE(displs)
END IF
END SUBROUTINE disp
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE ppde3d_mod
diff --git a/examples/ppde3d_pb.f90 b/examples/ppde3d_pb.f90
index 850056b..800226f 100644
--- a/examples/ppde3d_pb.f90
+++ b/examples/ppde3d_pb.f90
@@ -1,518 +1,518 @@
!>
!> @file ppde3d_pb.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Solving the following 3d PDE using splines:
!
! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my)cos(z)^n, with f(x=1,y) = 0
! exact solution: f(x,y) = (1-x^2) x^m cos(my)cos(z)^n
!
USE futils
USE fft
USE pputils2, ONLY : pptransp
USE ppde3d_pb_mod
!
IMPLICIT NONE
!
CHARACTER(len=128) :: infile="ppde3d_pb.in"
INTEGER :: l
INTEGER :: nx, ny, nz, nidbas(3), ngauss(3), mbess, npow, nterms
INTEGER :: startz, endz, nzloc
INTEGER :: start_rank, end_rank, nrank_loc
LOGICAL :: nlppform
INTEGER :: i, j, k, kk, ij, dimx, dimy, dimz, nrank, kl, ku
INTEGER :: jder(3), it
DOUBLE PRECISION :: pi, coefx(5)
DOUBLE PRECISION :: dy, dz
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, zgrid
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: fftmass
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rhs, sol, rhs_t
DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: crhs_t
!
TYPE(spline2d1d), TARGET :: splxyz
TYPE(spline2d), POINTER :: splxy
TYPE(spline1d) :: splz
TYPE(pbmat) :: mat
!
CHARACTER(len=128) :: file='ppde3d_pb.h5'
INTEGER :: fid
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
DOUBLE PRECISION :: seconds, mem, dopla
DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
INTEGER :: nits=500
!
INTEGER, PARAMETER :: npart=10000000
INTEGER :: nploc
DOUBLE PRECISION, DIMENSION(npart) :: xp, yp, zp, fp_calc, fp_anal
DOUBLE PRECISION zsuml, zsumg, errnorm2
!
INTEGER :: kmin, kmax
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: fftmass_shifted
!
NAMELIST /newrun/ nx, ny, nz, nidbas, ngauss, mbess, npow, nlppform, coefx
!===========================================================================
! 1.0 Prologue
!
! Init MPI
!
CALL mpi_init(ierr)
CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
!
! Get input file name from command argument
!
IF( COMMAND_ARGUMENT_COUNT() .EQ. 1 ) THEN
CALL GET_COMMAND_ARGUMENT(1, infile, l, ierr)
END IF
!
! Read in data specific to run
!
nx = 8 ! Number of intervals in x
ny = 8 ! Number of intervals in y
nz = 8 ! Number of intervals in z
nidbas = (/3,3,3/) ! Degree of splines
ngauss = (/4,4, 4/) ! Number of Gauss points/interval
mbess = 2 ! Exponent of differential problem
npow = 2 ! Exponent of differential problem
nterms = 2 ! Number of terms in weak form
nlppform = .TRUE. ! Use PPFORM for gridval or not
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
OPEN(unit=99, file=TRIM(infile), status='old', action='read')
READ(99,newrun)
IF( me.EQ.0) THEN
WRITE(*,newrun)
END IF
CLOSE(99)
!
! Define grid on x (=radial) & y (=poloidal) axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz))
!
xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0
CALL meshdist(coefx, xgrid, nx)
!
dy = 2.d0*pi/REAL(ny,8) ! Equidistant in y
ygrid = (/ (j*dy, j=0,ny) /)
!
! Partitionned toroidal grid z
!
dz = 2.0d0*pi/REAL(nz,8) ! Equidistant in z
zgrid = (/ (k*dz, k=0,nz) /)
CALL dist1d(0, nz, startz, nzloc)
endz = startz+nzloc
!!$ PRINT*, 'PE', me, ' startz, endz, nzloc', startz, endz, nzloc
!
IF( me.EQ.0) THEN
WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz)
END IF
!
! Create hdf5 file
!
CALL creatf(file, fid, 'PDE2D Result File', real_prec='d', &
& mpicomm=MPI_COMM_WORLD)
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'NZ', nz)
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'NIDBAS3', nidbas(3))
CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
CALL attach(fid, '/', 'NGAUSS2', ngauss(3))
CALL attach(fid, '/', 'MBESS', mbess)
CALL putarr(fid, '/xgrid', xgrid, 'r')
CALL putarr(fid, '/ygrid', ygrid, '\theta')
CALL putarr(fid, '/zgrid', zgrid(0:nz-1), '\phi')
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
t0 = seconds()
CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid(startz:endz), &
& splxyz, (/.FALSE., .TRUE., .TRUE./), nlppform=nlppform)
splxy => splxyz%sp12
!
IF( me.EQ.0) THEN
WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
END IF
CALL disp(splxyz%sp3%knots, 'KNOTS in Z', MPI_COMM_WORLD)
!
! 2D FE matrix assembly (in plane x-y)
!
nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix
kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals
ku = kl ! Number of super-diagnonals
IF(me.EQ.0) THEN
WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
END IF
!
CALL init(ku, nrank, nterms, mat)
CALL dismat(splxy, mat)
ALLOCATE(arr(nrank))
!
! BC on Matrix
!
CALL ibcmat(mat, ny)
tmat = seconds() - t0
!
! 3D RHS assembly
!
ALLOCATE(rhs(nrank,0:nzloc+nidbas(3)-1)) ! With right guard cells nzloc:nzloc+nidbas3-1
ALLOCATE(sol(nrank,0:nzloc-1))
CALL disrhs3(mbess, npow, splxyz, rhs)
!
zsuml = SUM(ABS(rhs(:,0:nzloc-1)))
CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, &
& MPI_COMM_WORLD, ierr)
IF(me.EQ.0) PRINT*, 'sum of rhs after DISRHS3', zsumg
!
! FFT in z of RHS
!
CALL dist1d(1, nrank, start_rank, nrank_loc)
end_rank = start_rank+nrank_loc-1
ALLOCATE(rhs_t(0:nz-1,nrank_loc), crhs_t(0:nz-1,nrank_loc))
!
CALL pptransp(MPI_COMM_WORLD, rhs(:,0:nzloc-1), rhs_t)
crhs_t = rhs_t
CALL fourcol(crhs_t,1)
crhs_t = crhs_t/REAL(nz,8)
!
! Apply Mass matrix to crhs
!
CALL set_spline(nidbas(3), ngauss(3), zgrid, splz, .TRUE.)
kmin =-nz/2
kmax = nz/2-1
CALL init_dft(splz, kmin, kmax)
ALLOCATE(fftmass_shifted(kmin:kmax))
ALLOCATE(fftmass(0:nz-1))
CALL calc_fftmass_old(splz, fftmass_shifted)
DO k=kmin,kmax
fftmass(MODULO(k+nz,nz)) = fftmass_shifted(k)
END DO
IF(me.EQ.0) THEN
WRITE(*,'(/a/(10(1pe12.3)))') 'Mass matrix', fftmass
END IF
DO k=0,nz-1
crhs_t(k,:) = crhs_t(k,:)/fftmass(k)
END DO
!
! Fourier transform back crhs to real space in z
!
CALL fourcol(crhs_t, -1)
rhs_t(:,:) = REAL(crhs_t(:,:),8)
CALL pptransp(MPI_COMM_WORLD, rhs_t, sol) ! Put the final RHS in SOL
!
! BC on RHS
!
CALL ibcrhs3(sol, ny)
!
IF(me.EQ.0) THEN
WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
END IF
!===========================================================================
! 3.0 Solve the dicretized system
!
t0 = seconds()
CALL factor(mat)
tfact = seconds() - t0
gflops1 = dopla('DPBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
t0 = seconds()
CALL bsolve(mat, sol)
!
! Backtransform of solution
!
DO k=0,nzloc-1
sol(1:ny-1,k) = sol(ny,k)
END DO
!
zsuml = SUM(ABS(sol))
CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, &
& MPI_COMM_WORLD, ierr)
IF(me.EQ.0) PRINT*, 'sum of sol', zsumg
!
tsolv = seconds() - t0
gflops2 = dopla('DPBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
!
! Spline coefficients, taking into account of periodicity in y and z
! Note: in SOL, y was numbered first.
!
dimx = splxy%sp1%dim
dimy = splxy%sp2%dim
dimz = splxyz%sp3%dim
ALLOCATE(bcoef(0:dimx-1, 0:dimy-1, 0:dimz-1))
!
! Get 3D array of spline coefs.
!
DO j=0,dimy-1
DO i=0,dimx-1
ij = MODULO(j,ny) + i*ny + 1
DO k=0,nzloc-1
bcoef(i,j,k) = sol(ij,k)
END DO
END DO
END DO
!
! Get missing coefs from remote guard cells
!
prev = MODULO(me-1,npes)
next = MODULO(me+1,npes)
count = dimx*dimy
DO i=0,nidbas(3)-1
CALL mpi_sendrecv(bcoef(0,0,i), count, MPI_DOUBLE_PRECISION, prev, 0, &
& bcoef(0,0,nzloc+i), count, MPI_DOUBLE_PRECISION, next, 0, &
& MPI_COMM_WORLD, status, ierr)
END DO
!
IF(me.EQ.0) THEN
WRITE(*,'(/a,3i6)') 'dimx, dimy, dimz =', dimx, dimy, dimz
END IF
!===========================================================================
! 4.0 Check the solution
!
! Check function values computed with various method
!
CALL RANDOM_NUMBER(xp)
CALL RANDOM_NUMBER(yp); yp = 2.d0*pi*yp
CALL RANDOM_NUMBER(zp); zp = 2.d0*pi*zp
nploc = 0
DO i=1,npart
IF(zp(i).GE.zgrid(startz) .AND. zp(i).LT.zgrid(endz)) THEN
nploc = nploc+1
xp(nploc) = xp(i)
yp(nploc) = yp(i)
zp(nploc) = zp(i)
END IF
END DO
jder = (/0,0,0/)
CALL gridval(splxyz, xp(1:nploc), yp(1:nploc), zp(1:nploc), fp_calc(1:nploc), jder, bcoef)
DO i=1,nploc
fp_anal(i) = (1-xp(i)**2) * xp(i)**mbess &
& * COS(mbess*yp(i)) * COS(zp(i))**npow
END DO
errnorm2 = norm21(fp_calc(1:nploc)-fp_anal(1:nploc))/norm21(fp_calc(1:nploc))
IF(me.EQ.0) THEN
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors, using random points', &
& errnorm2
END IF
!
ALLOCATE(solcal(0:nx,0:ny,0:nzloc-1))
ALLOCATE(solana(0:nx,0:ny,0:nzloc-1))
ALLOCATE(errsol(0:nx,0:ny,0:nzloc-1))
!
DO i=0,nx
DO j=0,ny
DO k=0,nzloc-1
kk=startz+k
solana(i,j,k) = (1-xgrid(i)**2) * xgrid(i)**mbess &
& * COS(mbess*ygrid(j)) * COS(zgrid(kk))**npow
END DO
END DO
END DO
!
jder = (/0,0,0/)
CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef)
t0 = seconds()
CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
tgrid = seconds()-t0
errsol = solana - solcal
!
errnorm2 = norm2(errsol) / norm2(solana)
IF(me.EQ.0) THEN
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
& errnorm2
END IF
CALL putarr(fid, '/sol', solcal,pardim=3)
CALL putarr(fid, '/solana', solana,pardim=3)
!
! Check derivative d/dx
!
DO i=0,nx
DO j=0,ny
DO k=0,nzloc-1
IF( mbess .EQ. 0 ) THEN
solana(i,j,k) = -2.0d0 * xgrid(i) * COS(zgrid(k+startz))**npow
ELSE
solana(i,j,k) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
& xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) * &
& COS(zgrid(k+startz))**npow
END IF
END DO
END DO
END DO
!
jder = (/1,0,0/)
t0 = seconds()
CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
tgrid = tgrid + seconds()-t0
errsol = solana - solcal
errnorm2 = norm2(errsol) / norm2(solana)
IF(me.EQ.0) THEN
WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dx', errnorm2
END IF
CALL putarr(fid, '/derivx', solcal, pardim=3)
CALL putarr(fid, '/derivx_exact', solana,pardim=3)
!
! Check derivative d/dy
!
DO i=0,nx
DO j=0,ny
DO k=0,nzloc-1
solana(i,j,k) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * &
& SIN(mbess*ygrid(j))* COS(zgrid(k+startz))**npow
END DO
END DO
END DO
!
jder = (/0,1,0/)
t0 = seconds()
CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
tgrid = tgrid + seconds()-t0
errsol = solana - solcal
errnorm2 = norm2(errsol) / norm2(solana)
IF(me.EQ.0) THEN
WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dy', errnorm2
END IF
CALL putarr(fid, '/derivy', solcal, pardim=3)
CALL putarr(fid, '/derivy_exact', solana,pardim=3)
!
! Check derivative d/dz
!
DO i=0,nx
DO j=0,ny
DO k=0,nzloc-1
solana(i,j,k) = -npow*(1-xgrid(i)**2) * xgrid(i)**mbess &
& * COS(mbess*ygrid(j)) * COS(zgrid(k+startz))**(npow-1) &
& * SIN(zgrid(k+startz))
END DO
END DO
END DO
!
jder = (/0,0,1/)
t0 = seconds()
IF(nlppform) THEN
CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
ELSE
CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef)
END IF
tgrid = tgrid + seconds()-t0
errsol = solana - solcal
errnorm2 = norm2(errsol) / norm2(solana)
IF(me.EQ.0) THEN
WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dz', errnorm2
END IF
CALL putarr(fid, '/derivz', solcal, pardim=3)
CALL putarr(fid, '/derivz_exact', solana,pardim=3)
!===========================================================================
! 9.0 Epilogue
!
IF(me.EQ.0) THEN
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv
WRITE(*,'(a,1pe12.3)') 'gridval time (s) ', tgrid
WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
END IF
!
DEALLOCATE(xgrid, ygrid, zgrid)
DEALLOCATE(fftmass)
DEALLOCATE(fftmass_shifted)
DEALLOCATE(rhs)
DEALLOCATE(sol)
DEALLOCATE(rhs_t)
DEALLOCATE(crhs_t)
DEALLOCATE(bcoef)
DEALLOCATE(solcal, solana, errsol)
DEALLOCATE(arr)
CALL destroy_sp(splxyz)
CALL destroy_sp(splz)
CALL destroy(mat)
!
CALL closef(fid)
!
CALL mpi_finalize(ierr)
!===========================================================================
!
CONTAINS
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:,:,:)
DOUBLE PRECISION :: sum2, sum2g
INTEGER :: i, j
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
DO j=1,SIZE(x,2)
DO k=1,SIZE(x,3)
sum2 = sum2 + x(i,j,k)**2
END DO
END DO
END DO
CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
& MPI_COMM_WORLD, ierr)
norm2 = SQRT(sum2g)
END FUNCTION norm2
!
FUNCTION norm21(x)
!
! Compute the 2-norm of array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm21
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: sum2, sum2g
INTEGER :: i, j
!
sum2 = DOT_PRODUCT(x,x)
CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
& MPI_COMM_WORLD, ierr)
norm21 = SQRT(sum2g)
END FUNCTION norm21
SUBROUTINE prnmat(label, mat)
CHARACTER(len=*) :: label
DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
INTEGER :: i
WRITE(*,'(/a)') TRIM(label)
DO i=1,SIZE(mat,1)
WRITE(*,'(10(1pe12.3))') mat(i,:)
END DO
END SUBROUTINE prnmat
END PROGRAM main
!
!+++
diff --git a/examples/ppde3d_pb_mod.f90 b/examples/ppde3d_pb_mod.f90
index 72c4917..6995b7a 100644
--- a/examples/ppde3d_pb_mod.f90
+++ b/examples/ppde3d_pb_mod.f90
@@ -1,452 +1,452 @@
!>
!> @file ppde3d_pb_mod.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
MODULE ppde3d_pb_mod
USE bsplines
USE matrix
IMPLICIT NONE
INCLUDE "mpif.h"
!
INTEGER :: me, npes
INTEGER :: prev, next
INTEGER :: count, status(MPI_STATUS_SIZE), ierr
!
CONTAINS
SUBROUTINE dismat(spl, mat)
!
! Assembly of FE matrix mat using spline spl
!
TYPE(spline2d), INTENT(in) :: spl
TYPE(pbmat), INTENT(inout) :: mat
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2
INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION:: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw ! Derivative order
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: coefs ! Terms in weak form
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms))
!
ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative
ALLOCATE(fun2(0:nidbas2,0:1)) !
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!===========================================================================
! 2.0 Assembly loop
!
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), spl%sp1, fun1, i)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), spl%sp2, fun2, j)
CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs)
DO iterm=1,kterms
DO iw1=0,nidbas1 ! Weight function in dir 1
igw1 = i+iw1
DO iw2=0,nidbas2 ! Weight function in dir 2
igw2 = MODULO(j+iw2-1, n2) + 1
irow = igw2 + (igw1-1)*n2
DO it1=0,nidbas1 ! Test function in dir 1
igt1 = i+it1
DO it2=0,nidbas2 ! Test function in dir 2
igt2 = MODULO(j+it2-1, n2) + 1
jcol = igt2 + (igt1-1)*n2
contrib = fun1(iw1,iderw(iterm,1)) * &
& fun2(iw2,iderw(iterm,2)) * &
& coefs(iterm) * &
& fun2(it2,idert(iterm,2)) * &
& fun1(it1,idert(iterm,1)) * &
& wg1(ig1) * wg2(ig2)
CALL updtmat(mat, irow, jcol, contrib)
END DO
END DO
END DO
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
DEALLOCATE(idert, iderw, coefs)
!
CONTAINS
SUBROUTINE coefeq(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
!
! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
!
c(1) = x !
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.d0/x
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE disrhs3(mbess, npow, spl, rhs)
!
! Assembly the RHS using 3d spline spl
!
INTEGER, INTENT(in) :: mbess, npow
TYPE(spline2d1d), TARGET :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:,:)
!
TYPE(spline1d), POINTER :: sp1, sp2, sp3
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: n3, nidbas3, ndim3, ng3
INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:)
DOUBLE PRECISION, ALLOCATABLE :: buf(:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
sp1 => spl%sp12%sp1
sp2 => spl%sp12%sp2
sp3 => spl%sp3
!
CALL get_dim(sp1, ndim1, n1, nidbas1)
CALL get_dim(sp2, ndim2, n2, nidbas2)
CALL get_dim(sp3, ndim3, n3, nidbas3)
!
ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(sp1, ng1)
CALL get_gauss(sp2, ng2)
CALL get_gauss(sp3, ng3)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng2), wg2(ng2))
ALLOCATE(xg3(ng3), wg3(ng3))
!===========================================================================
! 2.0 Assembly loop
!
nrank = SIZE(rhs,1)
rhs = 0.0d0
!
DO i=1,n1
CALL get_gauss(sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), sp1, fun1, i)
DO j=1,n2
CALL get_gauss(sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), sp2, fun2, j)
DO k=1,n3
CALL get_gauss(sp3, ng3, k, xg3, wg3)
DO ig3=1,ng3
CALL basfun(xg3(ig3), sp3, fun3, k)
contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * &
& rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow)
DO k1=0,nidbas1
i1 = i+k1
DO k2=0,nidbas2
j2 = MODULO(j+k2-1,n2) + 1
ij = j2 + (i1-1)*n2
DO k3=0,nidbas3
kk = k+k3
rhs(ij,kk) = rhs(ij, kk) + &
& contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1)
END DO
END DO
END DO
END DO
END DO
END DO
END DO
END DO
END DO
!
! Update from remote guard cells
!
CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
next = MODULO(me+1,npes)
prev = MODULO(me-1,npes)
count = nrank
ALLOCATE(buf(nrank))
DO i=nidbas3,1,-1
CALL mpi_sendrecv(rhs(1,n3+i), count, MPI_DOUBLE_PRECISION, next, 0, &
& buf, count, MPI_DOUBLE_PRECISION, prev, 0, &
& MPI_COMM_WORLD, status, ierr)
rhs(:,i) = rhs(:,i) + buf(:)
END DO
DEALLOCATE(buf)
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
DEALLOCATE(xg3, wg3, fun3)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n)
DOUBLE PRECISION, INTENT(in) :: x1, x2, x3
INTEGER, INTENT(in) :: m, n
rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n
END FUNCTION rhseq
END SUBROUTINE disrhs3
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE meshdist(c, x, nx)
!
! Construct an 1d non-equidistant mesh given a
! mesh distribution function.
!
DOUBLE PRECISION, INTENT(in) :: c(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
INTEGER :: nintg
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
DOUBLE PRECISION :: a, b, dx, f0, f1, scal
INTEGER :: i, k
!
a=x(0)
b=x(nx)
nintg = 10*nx
ALLOCATE(xint(0:nintg), fint(0:nintg))
!
! Mesh distribution
!
dx = (b-a)/REAL(nintg)
xint(0) = a
fint(0) = 0.0d0
f1 = fdist(xint(0))
DO i=1,nintg
f0 = f1
xint(i) = xint(i-1) + dx
f1 = fdist(xint(i))
fint(i) = fint(i-1) + 0.5*(f0+f1)
END DO
!
! Normalization
!
scal = REAL(nx) / fint(nintg)
fint(0:nintg) = fint(0:nintg) * scal
!
! Obtain mesh point by (inverse) interpolation
!
k = 1
DO i=1,nintg-1
IF( fint(i) .GE. REAL(k) ) THEN
x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
& (k-fint(i))
k = k+1
END IF
END DO
!
DEALLOCATE(xint, fint)
CONTAINS
DOUBLE PRECISION FUNCTION fdist(x)
DOUBLE PRECISION, INTENT(in) :: x
fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
END FUNCTION fdist
END SUBROUTINE meshdist
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcmat(mat, ny)
!
! Apply BC on matrix
!
TYPE(pbmat), INTENT(inout) :: mat
INTEGER, INTENT(in) :: ny
INTEGER :: kl, ku, nrank, i, j
DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
!===========================================================================
! 1.0 Prologue
!
ku = mat%ku
kl = ku
nrank = mat%rank
ALLOCATE(zsum(nrank), arr(nrank))
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum on the NY-th row
!
zsum = 0.0d0
DO i=1,ny
arr = 0.0d0
CALL getrow(mat, i, arr)
DO j=1,ny+ku
zsum(j) = zsum(j) + arr(j)
END DO
END DO
!
zsum(ny) = SUM(zsum(1:ny)) ! using symmetry
CALL putrow(mat, ny, zsum)
!
! The away operator
!
DO i = 1,ny-1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!===========================================================================
! 3.0 Dirichlet on right boundary
!
DO i = nrank, nrank-ny+1, -1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(zsum, arr)
!
END SUBROUTINE ibcmat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcrhs3(rhs, ny)
!
! Apply BC on RHS
!
DOUBLE PRECISION, INTENT(inout) :: rhs(:,:)
INTEGER, INTENT(in) :: ny
INTEGER :: nrank, nz, k
DOUBLE PRECISION :: zsum
!===========================================================================
! 1.0 Prologue
!
nrank = SIZE(rhs,1)
nz = SIZE(rhs,2)
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum
!
DO k=1,nz
zsum = SUM(rhs(1:ny,k))
rhs(ny,k) = zsum
rhs(1:ny-1,k) = 0.0d0
END DO
!===========================================================================
! 3.0 Dirichlet on right boundary
!
DO k=1,nz
rhs(nrank-ny+1:nrank,k) = 0.0d0
END DO
END SUBROUTINE ibcrhs3
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE dist1d(s0, ntot, s, nloc)
INCLUDE 'mpif.h'
INTEGER, INTENT(in) :: s0, ntot
INTEGER, INTENT(out) :: s, nloc
INTEGER :: me, npes, ierr, naver, rem
!
CALL MPI_COMM_SIZE(MPI_COMM_WORLD, npes, ierr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD, me, ierr)
naver = ntot/npes
rem = MODULO(ntot,npes)
s = s0 + MIN(rem,me) + me*naver
nloc = naver
IF( me.LT.rem ) nloc = nloc+1
END SUBROUTINE dist1d
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE disp(a, str, comm)
!
! Gather partitionned 1d array to 0 and print it
!
INCLUDE 'mpif.h'
DOUBLE PRECISION, INTENT(in) :: a(:)
INTEGER, INTENT(in) :: comm
CHARACTER(len=*), INTENT(in) :: str
INTEGER :: n, ntot, npes, me, ierr, i
DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:) :: c
INTEGER, ALLOCATABLE, DIMENSION(:) :: counts, displs
!
CALL mpi_comm_rank(comm, me, ierr)
CALL mpi_comm_size(comm, npes, ierr)
n = SIZE(a)
IF(me.EQ.0) THEN
ALLOCATE(counts(npes), displs(npes+1))
END IF
CALL mpi_gather(n, 1, MPI_INTEGER, counts, 1, MPI_INTEGER, 0, comm, ierr)
IF(me.EQ.0) THEN
displs(1) = 0
DO i=2,npes+1
displs(i) = displs(i-1)+counts(i-1)
END DO
ntot = displs(npes+1)
ALLOCATE(c(ntot))
c = 0.0d0
END IF
CALL mpi_gatherv(a, n, MPI_DOUBLE_PRECISION, c, counts, displs, &
& MPI_DOUBLE_PRECISION, 0, comm, ierr)
IF( me.EQ.0 ) THEN
WRITE(*,'(/a)') TRIM(str)
DO i=1,npes
WRITE(*,'(a,i3.3/(10(1pe12.3)))') 'PE', i-1, &
& c(displs(i)+1:displs(i+1))
END DO
DEALLOCATE(c)
DEALLOCATE(counts)
DEALLOCATE(displs)
END IF
END SUBROUTINE disp
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE ppde3d_pb_mod
diff --git a/examples/tbasfun.f90 b/examples/tbasfun.f90
index 26bf500..cec95a8 100644
--- a/examples/tbasfun.f90
+++ b/examples/tbasfun.f90
@@ -1,137 +1,137 @@
!>
!> @file tbasfun.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test scalar and vector versions of def_basfun
!
USE bsplines
IMPLICIT NONE
INTEGER :: nx, nidbas, nrank, npt, jdermx
DOUBLE PRECISION :: dx
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid
DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fun(:, :), vfun(:,:,:)
DOUBLE PRECISION, ALLOCATABLE :: fun1(:, :), vfun1(:,:,:)
DOUBLE PRECISION :: errfun
INTEGER :: left, i, nerrs, k
INTEGER, ALLOCATABLE :: vleft(:)
LOGICAL :: nlper=.FALSE.
TYPE(spline1d) :: splx
!
NAMELIST /newrun/ nx, nidbas, npt, jdermx, nlper
!
!===============================================================================
!
! 1D grid
!
nx = 10
nidbas = 3
npt = 1000000
jdermx = 0
READ(*,newrun)
WRITE(*,newrun)
ALLOCATE(xgrid(0:nx))
dx = 1.0d0/REAL(nx)
xgrid = (/ (i*dx,i=0,nx) /)
WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
!
! Set up spline
!
CALL set_spline(nidbas, 4, xgrid, splx, period=nlper)
nrank = splx%dim
WRITE(*,'(a, i5)') 'nrank =', nrank
WRITE(*,'(a/(10f8.3))') 'knots', splx%knots
!
IF(nx.LE.2) THEN
WRITE(*,'(/a)') 'VAL0'
DO i=1,nx
WRITE(*,'(a,i3)') 'Interval', i
DO k=1,nidbas+1 ! Spline number
WRITE(*,'(10f12.4)') splx%val0(:,k,i)
END DO
END DO
IF(nlper) THEN
WRITE(*,'(/a)') 'VALC'
DO k=1,nidbas+1 ! Spline number
WRITE(*,'(10f12.4)') splx%valc(:,k)
END DO
END IF
END IF
!
ALLOCATE(xpt(npt))
ALLOCATE(vleft(npt))
ALLOCATE(fun(0:nidbas,0:jdermx)) ! Values and derivatives of all Splines
ALLOCATE(vfun(0:nidbas,0:jdermx,npt))
ALLOCATE(fun1(0:nidbas,0:jdermx)) ! Values and derivatives of all Splines
ALLOCATE(vfun1(0:nidbas,0:jdermx,npt))
CALL RANDOM_NUMBER(xpt)
!===============================================================================
!
! Check def_basfun
!
CALL def_basfun(xpt, splx, vfun, vleft)
!
WRITE(*,'(/a)') 'vector def_basfun versus scalar def_basfun'
WRITE(*,'(a6,a12, 2a6, a12)') 'i', 'x', 'left', 'vleft', 'Max. err'
DO i=1,npt
CALL def_basfun(xpt(i), splx, fun, left)
errfun= MAXVAL(ABS(fun(:,:)-vfun(:,:,i)))
WRITE(*,'(i6,1pe12.4,2i6,1pe12.4)') i, xpt(i), left, vleft(i), errfun
END DO
!
IF(npt.LE.10) THEN
WRITE(*,'(/a)') 'Scalar/vector basfun'
DO i=1,npt
CALL basfun(xpt(i), splx, fun, vleft(i)+1)
WRITE(*,'(a,1pe12.4/10(1pe12.4))') 'x = ', xpt(i), fun(:,:)
WRITE(*,'(10(1pe12.4))') vfun(:,:,i)
END DO
END IF
!===============================================================================
!
! Check basfun
!
CALL basfun(xpt, splx, vfun1, vleft+1)
WRITE(*,'(/a,1pe12.4)') 'vector basfun versus vector def_basun: Max err', &
& MAXVAL(ABS(vfun-vfun1))
!
WRITE(*,'(/a)') 'vector basfun versus scalar basfun'
WRITE(*,'(a6,a12,a12)') 'i', 'x', 'Max. err'
DO i=1,npt
CALL basfun(xpt(i), splx, fun1, vleft(i)+1)
errfun= MAXVAL(ABS(fun1(:,:)-vfun1(:,:,i)))
WRITE(*,'(i6,1pe12.4,1pe12.4)') i, xpt(i), errfun
END DO
!!===============================================================================
!
! Clean up
!
CALL destroy_sp(splx)
DEALLOCATE(xgrid)
DEALLOCATE(xpt)
DEALLOCATE(vleft)
DEALLOCATE(fun)
END PROGRAM main
diff --git a/examples/tcdsmat.f90 b/examples/tcdsmat.f90
index f832abf..0e3f565 100644
--- a/examples/tcdsmat.f90
+++ b/examples/tcdsmat.f90
@@ -1,283 +1,283 @@
!>
!> @file tcdsmat.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Solving the following 2d PDE using splines and iterative method
!
! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
! exact solution: f(x,y) = (1-x^2) x^m cos(my)
!
USE tcdsmat_mod
USE bsplines
USE cds
USE futils
!
IMPLICIT NONE
INCLUDE 'mpif.h'
!
INTEGER :: nints(2), nidbas(2), ngauss(2), mbess, nterms
DOUBLE PRECISION :: pi, coefx(5), coefy(5)
DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), ygrid(:)
DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:)
INTEGER :: mrank, bw0
TYPE(spline2d) :: splxy
TYPE(cds_mat) :: mat
LOGICAL :: readmat, verbose
CHARACTER(len=128) :: file='tcdsmat.h5'
CHARACTER(len=128) :: filein
INTEGER :: fid, fidin
DOUBLE PRECISION :: mem, seconds
DOUBLE PRECISION :: t0, tmat, tbal, tsolv, tgrid, tmumps(2)
INTEGER :: nitmx=100, niter, nssor
DOUBLE PRECISION :: rtolmx=1.0d-6, omega=0.0d0, resid
!
INTEGER :: i, j, ij, dimx, dimy
DOUBLE PRECISION, ALLOCATABLE :: bcoef(:,:)
DOUBLE PRECISION, ALLOCATABLE :: solcal(:,:), solana(:,:), errsol(:,:)
INTEGER, ALLOCATABLE :: dists(:)
!
INTEGER :: ierr, me
TYPE(mumps_mat) :: mat_mumps
!
NAMELIST /newrun/ nints, nidbas, ngauss, mbess, coefx, coefy, &
& nitmx, rtolmx, omega, nssor, readmat, verbose, filein
!===========================================================================
! 1.0 Prologue
!
CALL mpi_init(ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
!
! Read in data specific to run
!
WRITE(*,'(/a)') 'Prologue ...'
readmat = .FALSE. ! Read matrix and rhs from file
filein = 'mat.h5'
nints = (/8,8/) ! Number of intervals in x, y
nidbas = (/3,3/) ! Degree of splines
ngauss = (/4,4/) ! Number of Gauss points/interval
mbess = 2 ! Exponent of differential problem
nterms = 2 ! Number of terms in weak form
nitmx = 1000 ! Max number of iterations
rtolmx = 1.e-12 ! Max relative tolerance
nssor = 1 ! Number of SSOR precond steps
verbose = .FALSE. ! Output residue at each iteration
coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
READ(*,newrun)
!
! Overwrite some input by reading in "filein" if required
!
IF(readmat) THEN
CALL openf(filein, fidin, mode='r')
CALL getatt(fidin, '/', 'NX', nints(1))
CALL getatt(fidin, '/', 'NY', nints(2))
CALL getatt(fidin, '/', 'NIDBAS1', nidbas(1))
CALL getatt(fidin, '/', 'NIDBAS2', nidbas(2))
CALL getatt(fidin, '/', 'NGAUSS1', ngauss(1))
CALL getatt(fidin, '/', 'NGAUSS2', ngauss(2))
CALL getatt(fidin, '/', 'MBESS', mbess)
END IF
WRITE(*,newrun)
!
! Define grid on x (=radial) & y (=poloidal) axis
!
pi = 4.0d0*ATAN(1.0d0)
ALLOCATE(xgrid(0:nints(1)), ygrid(0:nints(2)))
xgrid(0) = 0.0d0; xgrid(nints(1)) = 1.0d0
CALL meshdist(coefx, xgrid, nints(1))
ygrid(0) = 0.0d0; ygrid(nints(2)) = 2.d0*pi
CALL meshdist(coefy, ygrid, nints(2))
!
! Create hdf5 file
!
CALL creatf(file, fid, 'TCDSMAT Result File', real_prec='d')
CALL attach(fid, '/', 'NX', nints(1))
CALL attach(fid, '/', 'NY', nints(2))
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
CALL attach(fid, '/', 'MBESS', mbess)
!===========================================================================
! 2.0 Discretize the PDE
!
! Set up spline
!
WRITE(*,'(/a)') 'Discretize the PDE ...'
t0 = seconds()
CALL set_spline(nidbas, ngauss, &
& xgrid, ygrid, splxy, (/.FALSE., .TRUE./) )
!
! Set up CDS matrix for solver
!
CALL mstruct(nidbas, nints, mrank, dists)
bw0=(nidbas(1)+1)*nints(2) ! Half band including all zero diagonals
CALL init(mrank, dists, nterms, mat, bw0=bw0)
WRITE(*,'(a,4i8)') 'rank, kl, ku, bw0 = ', mat%rank, mat%kl, mat%ku, bw0
WRITE(*,'(i4,a/(10i8))') mat%ndiags, ' diagonals:', mat%dists
!
! FE matrix assembly and apply BC
!
IF(readmat) THEN
CALL getmat(fidin, '/MAT1', mat)
ELSE
CALL dismat(splxy, mat)
CALL ibcmat(mat, nints(2), nidbas(1))
END IF
!
WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
!
! Assembly RHS and apply BC
!
ALLOCATE(rhs(mrank), sol(mrank))
!
IF(readmat) THEN
CALL getarr(fidin, '/RHS', rhs)
ELSE
CALL disrhs(mbess, splxy, rhs)
CALL ibcrhs(rhs, nints(2))
CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
END IF
tmat = seconds() - t0
!===========================================================================
! 3.0 Diagonal balance of matrix
!
WRITE(*,'(/a)') 'Diagonal balance of matrix ...'
tbal = seconds()
IF( .NOT. readmat ) THEN
CALL diagbal(mat)
CALL putmat(fid,'/MAT1', mat, 'CDS matrix with BC')
END IF
rhs = mat%bal * rhs
tbal = seconds()-tbal
!===========================================================================
! 4.0 Analytical solutions
!
ALLOCATE(solcal(0:nints(1),0:nints(2)))
ALLOCATE(solana(0:nints(1),0:nints(2)))
ALLOCATE(errsol(0:nints(1),0:nints(2)))
DO i=0,nints(1)
DO j=0,nints(2)
solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
END DO
END DO
!===========================================================================
! 5.0 Direct solve with MUMPS
!
!
WRITE(*,'(/a)') 'Solve the linear system using MUMPS ...'
!
tmumps(1) = seconds()
CALL cds2mumps(mat, mat_mumps)
CALL factor(mat_mumps, debug=.FALSE.)
tmumps(2) = seconds()
sol = rhs
CALL bsolve(mat_mumps, sol, debug=.FALSE.)
sol = mat%bal * sol
sol(1:nints(2)-1) = sol(nints(2)) ! Unicity
tmumps(1:2) = seconds()-tmumps(1:2)
!
tgrid = seconds()
dimx = splxy%sp1%dim
dimy = splxy%sp2%dim
ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
!
DO j=0,dimy-1
DO i=0,dimx-1
ij = MODULO(j,nints(2)) + i*nints(2) + 1
bcoef(i,j) = sol(ij)
END DO
END DO
CALL gridval(splxy, xgrid, ygrid, solcal, (/0,0/), bcoef)
errsol = solana - solcal
tgrid = seconds() - tgrid
!
PRINT*, 'Relative discretization errors', norm2(errsol) / norm2(solana)
!!$ WRITE(*,'(a,2(1pe15.6))') 'Relative discretization errors', &
!!$ & norm2(errsol) / norm2(solana)
!===========================================================================
! 5.0 Solve the linear system using CG
!
WRITE(*,'(/a)') 'Solve the linear system using CG ...'
!
tsolv = seconds()
sol(:) = 0.0d0 ! Initial guest for solution
IF( nssor .EQ. 0 ) THEN
CALL cg(mat, rhs, omega, nitmx, rtolmx, sol, resid, niter, &
& verbose=verbose)
ELSE
CALL cg(mat, rhs, omega, nitmx, rtolmx, sol, resid, niter, &
& verbose=verbose, nssor=nssor)
END IF
sol = mat%bal * sol
sol(1:nints(2)-1) = sol(nints(2)) ! Unicity
tsolv = seconds()-tsolv
!
DO j=0,dimy-1
DO i=0,dimx-1
ij = MODULO(j,nints(2)) + i*nints(2) + 1
bcoef(i,j) = sol(ij)
END DO
END DO
CALL gridval(splxy, xgrid, ygrid, solcal, (/0,0/), bcoef)
errsol = solana - solcal
!
PRINT*, 'Relative discretization errors', norm2(errsol) / norm2(solana)
!===========================================================================
! 9.0 Epilogue
!
CALL putarr(fid, '/SOL', sol)
CALL putarr(fid, '/xgrid', xgrid, 'r')
CALL putarr(fid, '/ygrid', ygrid, '\theta')
CALL putarr(fid, '/sol', solcal, 'Solutions')
CALL putarr(fid, '/solana', solana,'Exact solutions')
CALL putarr(fid, '/errors', errsol, 'Errors')
!
WRITE(*,'(/a)') '---'
WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
WRITE(*,'(a,1pe12.3)') 'Matrice balancing time (s) ', tbal
WRITE(*,'(a,2(1pe12.3))') 'MUMPS solver time (s) ', tmumps
WRITE(*,'(a,1pe12.3)') 'Solution at grid time (s) ', tgrid
WRITE(*,'(a,i8,2(1pe12.3))') 'nits, resid, t(s)', niter, resid, tsolv
!
DEALLOCATE(xgrid,ygrid)
DEALLOCATE(rhs, sol)
DEALLOCATE(dists)
DEALLOCATE(bcoef)
DEALLOCATE(solcal,solana,errsol)
CALL destroy_sp(splxy)
CALL destroy(mat)
CALL closef(fid)
IF(readmat) THEN
CALL closef(fidin)
END IF
!
CALL mpi_finalize(ierr)
!===========================================================================
END PROGRAM main
diff --git a/examples/tcdsmat_mod.f90 b/examples/tcdsmat_mod.f90
index 776e422..9be9434 100644
--- a/examples/tcdsmat_mod.f90
+++ b/examples/tcdsmat_mod.f90
@@ -1,635 +1,635 @@
!>
!> @file tcdsmat_mod.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
MODULE tcdsmat_mod
IMPLICIT NONE
!
INTERFACE
SUBROUTINE meshdist(coefs, x, nx)
DOUBLE PRECISION, INTENT(in) :: coefs(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
END SUBROUTINE meshdist
END INTERFACE
!
INTERFACE norm2
MODULE PROCEDURE norm2_1d, norm2_2d
END INTERFACE
CONTAINS
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE mstruct(p, n, rank, dist)
!
! It is assumed that:
! . 2nd dimension is number first
! . 2nd dimension is periodic
!
INTEGER, INTENT(in) :: p(2), n(2)
INTEGER, INTENT(out) :: rank
INTEGER, ALLOCATABLE :: dist(:)
!
INTEGER, ALLOCATABLE :: pvect(:,:)
INTEGER :: kl, ku, i
!
rank = (n(1)+p(1))*n(2) ! Rank of the FE matrix
ku = (p(1)+1)*(2*p(2)+1)-1
kl = ku
ALLOCATE(pvect(0:2*p(2),0:p(1)))
IF( ALLOCATED(dist)) DEALLOCATE(dist)
ALLOCATE(dist(-kl:ku))
!
! Upper (North) points
pvect(0:p(2),0) = (/(i,i=0,p(2))/)
!
! Lower (South) points and periodicity of 2nd dim.
DO i=1,p(2)
pvect(p(2)+i,0) = n(2)-pvect(p(2)-i+1,0)
END DO
!
! Shift by N2 for points on the right (West) side
DO i=1,p(1)
pvect(:,i) = pvect(:,i-1)+n(2)
END DO
!
! Super-diagonals including the diagonal
dist(0:ku) = RESHAPE(pvect, (/ku+1/))
!
! Sub-diagonals
DO i=-1,-kl,-1
dist(i) = -dist(-i)
END DO
!
DEALLOCATE(pvect)
END SUBROUTINE mstruct
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE dismat(spl, mat)
!
! Assembly of FE matrix mat using spline spl
!
USE bsplines
USE cds
!
TYPE(spline2d), INTENT(in) :: spl
TYPE(cds_mat), INTENT(inout) :: mat
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2
INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION:: contrib
!
INTEGER :: kterms ! Number of terms in weak form
INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw ! Derivative order
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: coefs ! Terms in weak form
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
!
! Weak form
!
kterms = mat%nterms
ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms))
!
ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative
ALLOCATE(fun2(0:nidbas2,0:1)) !
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!===========================================================================
! 2.0 Assembly loop
!
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), spl%sp1, fun1, i)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), spl%sp2, fun2, j)
CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs)
DO iterm=1,kterms
DO iw1=0,nidbas1 ! Weight function in dir 1
igw1 = i+iw1
DO iw2=0,nidbas2 ! Weight function in dir 2
igw2 = MODULO(j+iw2-1, n2) + 1
irow = igw2 + (igw1-1)*n2
DO it1=0,nidbas1 ! Test function in dir 1
igt1 = i+it1
DO it2=0,nidbas2 ! Test function in dir 2
igt2 = MODULO(j+it2-1, n2) + 1
jcol = igt2 + (igt1-1)*n2
contrib = fun1(iw1,iderw(iterm,1)) * &
& fun2(iw2,iderw(iterm,2)) * &
& coefs(iterm) * &
& fun2(it2,idert(iterm,2)) * &
& fun1(it1,idert(iterm,1)) * &
& wg1(ig1) * wg2(ig2)
CALL updtmat(mat, irow, jcol, contrib)
END DO
END DO
END DO
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
DEALLOCATE(idert, iderw, coefs)
!
CONTAINS
SUBROUTINE coefeq(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
!
! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
!
c(1) = x !
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.d0/x
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcmat(mat, ny, px)
!
! Apply BC on matrix
!
USE cds
IMPLICIT NONE
TYPE(cds_mat), INTENT(inout) :: mat
INTEGER, INTENT(in) :: ny, px
!
INTEGER :: kl, ku, n, bw0, i, j
DOUBLE PRECISION :: zsum(mat%rank), arr(mat%rank)
!===========================================================================
! 1.0 Prologue
!
kl = mat%kl
ku = mat%ku
n = mat%rank
mat%ny = ny
!
! Size of row ny and column ny
!
bw0 = SIZE(mat%rowv)
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum on the NY-th row
! Store the sums at row ny in MAT%ROWV
!
zsum(1:n) = 0.0d0
DO i=1,ny
CALL getrow(mat, i, arr)
zsum(1:bw0) = zsum(1:bw0) + arr(1:bw0)
IF( i .LE. ny ) THEN ! Clear rows 1:(ny-1)
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END IF
END DO
mat%rowv(1:bw0) = zsum(1:bw0) ! row ny
!!$ WRITE(*,'(/a,/(10(f8.3)))') 'rowv', mat%rowv(1:130)
!
! The horizontal sum on the NY-th column
! The NY-th row of matrix was stored in mat%rowv
! Store the sums ar column ny at MAT%COLH
!
zsum(1:n) = 0.0d0
DO j=1,ny
CALL getcol(mat, j, arr)
zsum(ny) = zsum(ny) + mat%rowv(j)
zsum(ny+1:bw0) = zsum(ny+1:bw0) + arr(ny+1:bw0)
IF( j .NE. ny ) THEN ! Clear columns 1:(ny-1)
mat%rowv(j) = 0.0d0
arr = 0.0d0; arr(j) = 1.0d0
CALL putcol(mat, j, arr)
END IF
END DO
mat%rowv(ny) = 0.0d0 ! Its value is now in mat%colh(ny)
arr = 0.0d0
CALL putcol(mat, ny, arr)
CALL putrow(mat, ny, arr)
mat%colh(1:bw0) = zsum(1:bw0) ! column ny
!
! Move the diagonal term from mat%colh back to main diagonal
!
CALL putele(mat, ny, ny, mat%colh(ny))
mat%colh(ny) = 0.0d0
!!$ WRITE(*,'(/a,/(10(f8.3)))') 'rowv', mat%rowv(1:130)
!!$ WRITE(*,'(/a,/(10(f8.3)))') 'colh', mat%colh(1:130)
!!$ WRITE(*,'(/a,/(10(f8.3)))') 'colh', mat%val(1:ny,0)
!===========================================================================
! 3.0 Dirichlet on right boundary
!
DO j = n, n-ny+1, -1
arr = 0.0d0; arr(j) = 1.0d0
CALL putcol(mat, j, arr)
END DO
!
DO i = n, n-ny+1, -1
arr = 0.0d0; arr(i) = 1.0d0
CALL putrow(mat, i, arr)
END DO
!!$ WRITE(*,'(/a,/(10(f8.3)))') 'diag 1', mat%val(n-ny-1:n,1)
!!$ WRITE(*,'(/a,/(10(f8.3)))') 'diag 0', mat%val(n-ny:n,0)
!!$ WRITE(*,'(/a,/(10(f8.3)))') 'diag -1', mat%val(n-ny-1:n,-1)
!===========================================================================
! 9.0 Epilogue
!
END SUBROUTINE ibcmat
!===========================================================================
SUBROUTINE disrhs(mbess, spl, rhs)
!
! Assembly the RHS using 2d spline spl
!
USE bsplines
INTEGER, INTENT(in) :: mbess
TYPE(spline2d), INTENT(in) :: spl
DOUBLE PRECISION, INTENT(out) :: rhs(:)
!
INTEGER :: n1, nidbas1, ndim1, ng1
INTEGER :: n2, nidbas2, ndim2, ng2
INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
DOUBLE PRECISION:: contrib
!===========================================================================
! 1.0 Prologue
!
! Properties of spline space
!
CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
!
ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
!
! Gauss quadature
!
CALL get_gauss(spl%sp1, ng1)
CALL get_gauss(spl%sp2, ng2)
WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2
ALLOCATE(xg1(ng1), wg1(ng1))
ALLOCATE(xg2(ng1), wg2(ng1))
!===========================================================================
! 2.0 Assembly loop
!
nrank = SIZE(rhs)
rhs(1:nrank) = 0.0d0
!
DO i=1,n1
CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
DO ig1=1,ng1
CALL basfun(xg1(ig1), spl%sp1, fun1, i)
DO j=1,n2
CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
DO ig2=1,ng2
CALL basfun(xg2(ig2), spl%sp2, fun2, j)
contrib = wg1(ig1)*wg2(ig2) * &
& rhseq(xg1(ig1),xg2(ig2), mbess)
DO k1=0,nidbas1
i1 = i+k1
DO k2=0,nidbas2
j2 = MODULO(j+k2-1,n2) + 1
ij = j2 + (i1-1)*n2
rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
END DO
END DO
END DO
END DO
END DO
END DO
!===========================================================================
! 9.0 Epilogue
!
DEALLOCATE(xg1, wg1, fun1)
DEALLOCATE(xg2, wg2, fun2)
!
CONTAINS
DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
DOUBLE PRECISION, INTENT(in) :: x1, x2
INTEGER, INTENT(in) :: m
rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
END FUNCTION rhseq
END SUBROUTINE disrhs
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE ibcrhs(rhs, ny)
!
! Apply BC on RHS
!
DOUBLE PRECISION, INTENT(inout) :: rhs(:)
INTEGER, INTENT(in) :: ny
!
INTEGER :: nrank
DOUBLE PRECISION :: zsum
!===========================================================================
! 1.0 Prologue
!
nrank = SIZE(rhs,1)
!===========================================================================
! 2.0 Unicity at the axis
!
! The vertical sum
!
zsum = SUM(rhs(1:ny))
rhs(ny) = zsum
rhs(1:ny-1) = 0.0d0
!===========================================================================
! 3.0 Dirichlet on right boundary
!
rhs(nrank-ny+1:nrank) = 0.0d0
END SUBROUTINE ibcrhs
!===========================================================================
SUBROUTINE cg(mat, rhs, omega, nitmx, rtolmx, sol, resid, nit, verbose, &
& nssor)
!
! Preconditionned Conjugate Gradient solver
!
USE cds
TYPE(cds_mat) :: mat
DOUBLE PRECISION, INTENT(in) :: rhs(:), omega, rtolmx
INTEGER, INTENT(in) :: nitmx
DOUBLE PRECISION, INTENT(inout) :: sol(:)
DOUBLE PRECISION, OPTIONAL, INTENT(out) :: resid
INTEGER, OPTIONAL, INTENT(out) :: nit
INTEGER, OPTIONAL, INTENT(in) :: nssor
LOGICAL, OPTIONAL, INTENT(in) :: verbose
!
DOUBLE PRECISION, DIMENSION(SIZE(rhs,1)) :: wr, wz, wp, wq
DOUBLE PRECISION :: bnrm2, residue, rho0, rho1, alpha, beta
INTEGER :: it
!
bnrm2 = SQRT(DOT_PRODUCT(rhs,rhs)) ! Euclidian norm of RHS
it = 0
wr = rhs-vmx(mat,sol)
!
!... Iteration loop (see fig. 2.5, p.15 of "Templates...")
DO
it = it+1
IF( PRESENT(nssor) ) THEN
CALL psolve(mat, wz, wr, omega, nssor)
ELSE
wz = wr
END IF
rho1 = DOT_PRODUCT(wr,wz)
IF( it .EQ. 1 ) THEN
wp = wz
ELSE
beta = rho1/rho0
wp = wz + beta*wp
END IF
wq = vmx(mat,wp)
alpha = rho1 / DOT_PRODUCT(wp,wq)
sol = sol + alpha*wp
wr = wr - alpha*wq
residue = SQRT(DOT_PRODUCT(wr,wr)) / bnrm2
IF( PRESENT(verbose) ) THEN
IF(verbose) WRITE(*,'(a,i8,1pe12.3)') 'it, resid', it, residue
END IF
IF( residue .LE. rtolmx .OR. it .GE. nitmx) EXIT
rho0 = rho1
END DO
IF(PRESENT(resid)) resid = residue
IF(PRESENT(nit)) nit = it
END SUBROUTINE cg
!===========================================================================
SUBROUTINE psolve(mat, x, b, omega, niter_in)
!
! Preconditionners
!
USE cds
TYPE(cds_mat) :: mat
DOUBLE PRECISION, INTENT(out) :: x(:)
DOUBLE PRECISION, INTENT(in) :: b(:)
DOUBLE PRECISION, INTENT(in) :: omega
INTEGER, OPTIONAL, INTENT(in) :: niter_in
!
INTEGER :: niter
DOUBLE PRECISION :: rtolmx
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
! 1. No-preconditionning
!
IF( omega .LT. 0.0d0 ) THEN
x = b
RETURN
END IF
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
! 2. SSOR Preconditionner
!
niter = 1
rtolmx = 1.d-6
IF(PRESENT(niter_in)) THEN
niter = niter_in
END IF
CALL ssor(mat, b, omega, niter, rtolmx, x)
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END SUBROUTINE psolve
!
!===========================================================================
SUBROUTINE ssor(mat, b, omega, nitmx, rtolmx, x, resid, nit, verbose)
!
! Solve Ax = b using SSOR method
!
USE cds
TYPE(cds_mat) :: mat
DOUBLE PRECISION, INTENT(out) :: x(:)
DOUBLE PRECISION, INTENT(in) :: b(:)
DOUBLE PRECISION, INTENT(in) :: omega, rtolmx
INTEGER, INTENT(in) :: nitmx
DOUBLE PRECISION, OPTIONAL, INTENT(out) :: resid
INTEGER, OPTIONAL, INTENT(out) :: nit
LOGICAL, OPTIONAL, INTENT(in) :: verbose
!
INTEGER :: n, iter
INTEGER :: k, i, j, d, bw0, ny
DOUBLE PRECISION :: omega1, bnrm2, residue
DOUBLE PRECISION :: rhs(SIZE(x))
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
! 1. Initialization
n = SIZE(x)
bw0 = SIZE(mat%rowv)
ny = mat%ny
bnrm2 = norm2(b) ! Euclidian norm of RHS
omega1 = 1.0d0-omega
iter = 0
DO
iter = iter+1
!
! 2. Forward SOR
! Set RHS
rhs = omega*b ! rhs <- omega*b
IF( iter .GT. 1 ) THEN
rhs = rhs + omega1*x ! rhs <- rhs + (1-omega)*x0
DO k=1,mat%ku ! rhs <- rhs - omega*U*x0
d = mat%dists(k)
DO i=MAX(1,1-d),MIN(n,n-d)
rhs(i) = rhs(i) - omega*mat%val(i,k)*x(i+d)
END DO
END DO
IF( ny .NE. 0 ) THEN ! Contributions from unicity BC
rhs(ny) = rhs(ny) - omega*DOT_PRODUCT(mat%rowv(ny+1:bw0),x(ny+1:bw0))
END IF
END IF
!
! Solve (1+omega*L) x = rhs
x = rhs
IF( ny .NE. 0 ) THEN ! Contributions from unicity BC
rhs(ny+1:bw0) = rhs(ny+1:bw0) - omega*mat%colh(ny:bw0)*x(ny)
END IF
DO i=ny+1,n
DO k=-1,-mat%kl,-1
d = mat%dists(k)
j=i+d
IF( j.LE.0 ) EXIT
x(i) = x(i) - omega*mat%val(i,k)*x(j)
END DO
END DO
!
! 3. Backward SOR
! Set RHS
rhs = omega*b + omega1*x ! rhs <- omega*b + (1-omega)*x0
IF( ny .NE. 0 ) THEN ! Contributions from unicity BC
rhs(ny+1:bw0) = rhs(ny+1:bw0) - omega*mat%colh(ny:bw0)*x(ny)
END IF
DO k=-mat%kl,-1 ! rhs <- rhs - omega*L*x0
d = mat%dists(k)
DO i=MAX(1,1-d),MIN(n,n-d)
rhs(i) = rhs(i) - omega*mat%val(i,k)*x(i+d)
END DO
END DO
!
! Solve (1+omega*U) x = rhs
x = rhs
DO i=n-1,ny+1,-1
DO k=1,mat%ku
d = mat%dists(k)
j = i+d
IF( j.GT.n ) EXIT
x(i) = x(i) - omega*mat%val(i,k)*x(j)
END DO
END DO
IF( ny .NE. 0 ) THEN ! Contributions from unicity BC
x(ny) = x(ny) - omega*DOT_PRODUCT(mat%rowv(ny+1:bw0),x(ny+1:bw0))
END IF
!
! 4. Compute residue
!
IF( PRESENT(resid) ) THEN
residue = norm2(b-vmx(mat,x)) / bnrm2
IF(PRESENT(verbose)) THEN
IF(verbose) WRITE(*,'(a,i8,1pe12.3)') 'it, resid', iter, residue
END IF
IF( residue .LT. rtolmx ) EXIT
END IF
!
IF( iter .GE. nitmx ) EXIT
END DO ! End of SSOR iterations
IF(PRESENT(nit)) nit = iter
IF(PRESENT(resid)) resid = residue
END SUBROUTINE ssor
!===========================================================================
SUBROUTINE diagbal(mat)
!
! Diagonal matrix balancing: store D^(-1/2) in mat%bal
!
USE cds
TYPE(cds_mat) :: mat
INTEGER :: n, bw0, ny, d, i, k
DOUBLE PRECISION :: diag(mat%rank)
!
n = mat%rank
ny = mat%ny
bw0 = SIZE(mat%colh)
diag(1:n) = mat%val(1:n,0)
IF( MINVAL(diag) .LE. 0.0d0 ) THEN
WRITE(*,'(a)') 'Diagonal elements of matrix are not stricly positive!'
STOP
END IF
diag(1:n) = 1.0d0/SQRT(diag(1:n))
!
! Scale the matrix
!$OMP parallel do private (k,d,i)
DO k=-mat%kl,mat%ku
d = mat%dists(k)
DO i=MAX(1,1-d),MIN(n,n-d)
mat%val(i,k) = diag(i)*diag(i+d)*mat%val(i,k)
END DO
END DO
!$OMP end parallel do
!
! The ny^th column and row
IF( ny.NE.0 ) THEN
mat%rowv(1:bw0) = diag(1:bw0)*diag(ny)*mat%rowv(1:bw0)
mat%colh(1:bw0) = diag(ny)*diag(1:bw0)*mat%colh(1:bw0)
END IF
!
! Save D^(-1/2)
mat%bal(:) = diag(:)
END SUBROUTINE diagbal
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
FUNCTION norm2_1d(x)
!
! Compute the 2-norm of 1d array
!
DOUBLE PRECISION :: x(:)
DOUBLE PRECISION :: norm2_1d
norm2_1d = SQRT(SUM(x*x))
END FUNCTION norm2_1d
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
FUNCTION norm2_2d(x)
!
! Compute the 2-norm of 2d array
!
DOUBLE PRECISION :: x(:,:)
DOUBLE PRECISION :: norm2_2d
norm2_2d = SQRT(SUM(x*x))
END FUNCTION norm2_2d
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE tcdsmat_mod
diff --git a/examples/test_kron.f90 b/examples/test_kron.f90
index ee9d5fe..52a5a2c 100644
--- a/examples/test_kron.f90
+++ b/examples/test_kron.f90
@@ -1,125 +1,125 @@
!>
!> @file test_kron.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test Kronecker product for both GE and CSR versions.
!
USE iso_fortran_env, ONLY : rkind => real64
USE matrix, ONLY : gemat, init, kron, vmx
USE csr, ONLY : csr_mat, init, full_to_csr, kron, vmx
IMPLICIT NONE
!
TYPE(gemat) :: mata, matb, matc
TYPE(gemat) :: matu1
TYPE(csr_mat) :: mata_csr, matb_csr, matc_csr
INTEGER, PARAMETER :: m=3, n=2
REAL(rkind) :: a(m*m)=[ 1.1, 0.0, 0.0, 0.2, 1.5, 1.0, 0.5, 0.0, 1.0 ]
REAL(rkind) :: b(n*n)=[ 2.0, 1.0, 0.0, 3.0 ]
REAL(rkind), TARGET :: u(m,n), uu(m,n), v(m,n)
REAL(rkind), POINTER :: u1d(:), uu1d(:)
INTEGER :: i, s, e
!
CALL init(m, 0, mata)
CALL init(n, 0, matb)
mata%val(1:m,1:m) = RESHAPE(a, [m,m])
matb%val(1:n,1:n) = RESHAPE(b, [n,n])
!
CALL printmat_ge('Matrix A', mata)
CALL printmat_ge('Matrix B', matb)
!
u1d(1:m*n) => u ! u1d = vec(u)
u1d = [ (REAL(i,rkind), i=1,m*n) ]
CALL printmat('Array U', u)
!
! Compute (A.U).B^T
!
CALL init(n, 0, matu1, mrows=m)
v = TRANSPOSE(vmx(matb, TRANSPOSE(vmx(mata, u))))
CALL printmat('(A.U).B^T', v)
!
! Compute (BxA).vec(U)
!
CALL kron(matb, mata, matc)
uu1d(1:m*n) => uu
uu1d = vmx(matc, u1d)
CALL printmat_ge('Matrix C', matc)
CALL printmat('(BxA).vec(U)', uu)
!----------------------------------------------------------------------
!
! Using CSR matrices
!
CALL full_to_csr(mata%val, mata_csr)
CALL full_to_csr(matb%val, matb_csr)
CALL printmat_csr('Matrix A', mata_csr)
CALL printmat_csr('Matrix B', matb_csr)
!
CALL kron(matb_csr, mata_csr, matc_csr)
uu1d = vmx(matc_csr, u1d)
!
CALL printmat_csr('Matrix C', matc_csr)
CALL printmat('(BxA).vec(U)', uu)
!
CONTAINS
SUBROUTINE printmat(str, a)
CHARACTER(len=*) :: str
REAL(rkind), INTENT(in) :: a(:,:)
INTEGER :: i,m,n
WRITE(*,'(/a)') TRIM(str)
m=SIZE(a,1)
n=SIZE(a,2)
DO i=1,m
WRITE(*,'(12f8.3)') a(i,:)
END DO
END SUBROUTINE printmat
SUBROUTINE printmat_ge(str, a)
CHARACTER(len=*) :: str
TYPE(gemat) :: a
INTEGER :: i
WRITE(*,'(/a)') TRIM(str)
DO i=1,a%mrows
WRITE(*,'(12f8.3)') a%val(i,:)
END DO
END SUBROUTINE printmat_ge
SUBROUTINE printmat_csr(str, a)
CHARACTER(len=*) :: str
TYPE(csr_mat) :: a
INTEGER :: i, s, e
REAL(rkind) :: arow(a%ncols)
WRITE(*,'(/a,a,3i4)') TRIM(str), ': m, n, nnz', a%mrows, a%ncols, a%nnz
DO i=1,a%mrows
arow = 0.0_rkind
s = a%irow(i)
e = a%irow(i+1)-1
arow(a%cols(s:e)) = a%val(s:e)
WRITE(*,'(12f8.3)') arow
END DO
IF(SIZE(a%idiag) .GT. 0) THEN
WRITE(*,'(a,(20i4))') 'idiag =', a%idiag
WRITE(*,'(a,12f8.3)') 'diag = ', a%val(a%idiag)
END IF
!
END SUBROUTINE printmat_csr
END PROGRAM main
diff --git a/examples/test_pwsmp.f90 b/examples/test_pwsmp.f90
index 0537296..29ebbf1 100644
--- a/examples/test_pwsmp.f90
+++ b/examples/test_pwsmp.f90
@@ -1,287 +1,287 @@
!>
!> @file test_pwsmp.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!!$ USE futils
IMPLICIT NONE
INCLUDE 'mpif.h'
!
INTEGER :: npes, me, ierr, comm=MPI_COMM_WORLD
INTEGER :: l, i, lun=99
INTEGER :: nrank, nnz, s, e, nrank_loc, nnz_loc, nnz_sum
INTEGER :: istart, iend
INTEGER, ALLOCATABLE :: irow(:), cols(:)
INTEGER, ALLOCATABLE :: irow_loc(:), cols_loc(:)
DOUBLE PRECISION, ALLOCATABLE :: val(:), val_loc(:)
DOUBLE PRECISION, ALLOCATABLE :: rhs(:), rhs_loc(:)
DOUBLE PRECISION, ALLOCATABLE :: sol(:), sol_loc(:)
DOUBLE PRECISION :: mem
CHARACTER(len=128) :: fname = "mat.dat"
DOUBLE PRECISION :: mem_loc, mem_min, mem_max
DOUBLE PRECISION :: err, err_max, err_norm
DOUBLE PRECISION :: t0, tfact, tsolv
INTEGER :: it, nits=100
!
! PWSMP vars
!
DOUBLE PRECISION :: dparm(64)
INTEGER :: iparm(64)
INTEGER, ALLOCATABLE :: perm(:), invp(:)
!
INTEGER :: mrp ! just a placeholder in this program
DOUBLE PRECISION :: aux, diag ! just placeholders in this program
INTEGER :: naux=0, nrhs=1
!===========================================================================
! 1.0 Prologue
!
CALL mpi_init(ierr)
CALL mpi_comm_size(comm, npes, ierr)
CALL mpi_comm_rank(comm, me, ierr)
!===========================================================================
! 2.0 Read matrix
!
! File header
IF( command_argument_count() > 0 ) THEN
CALL get_command_argument(1, fname, l, ierr)
END IF
OPEN(unit=lun, file=fname, form="unformatted")
READ(lun) nrank, nnz
IF(me.EQ.0) WRITE(*,'(a,3i16)') 'npes, nrank, nnz', npes, nrank, nnz
!
! Matrix partition
CALL dist1d(comm, 1, nrank, istart, nrank_loc)
iend = istart+nrank_loc-1
WRITE(*,'(a,i3.3,a,2i12)') 'PE', me, ':istart, iend', istart, iend
ALLOCATE(irow_loc(nrank_loc+1))
!
! Read irow
ALLOCATE(irow(nrank+1))
READ(lun) irow
nnz_loc = irow(iend+1)-irow(istart)
CALL mpi_reduce(nnz_loc, nnz_sum, 1, MPI_INTEGER, MPI_SUM, 0, comm, ierr)
IF(me.EQ.0) THEN
PRINT*, 'nnz_sum', nnz_sum
END IF
irow_loc(:) = irow(istart:iend+1) ! Still unshifted
DEALLOCATE(irow)
!
ALLOCATE(cols_loc(nnz_loc))
ALLOCATE(val_loc(nnz_loc))
ALLOCATE(rhs_loc(nrank_loc))
ALLOCATE(sol_loc(nrank_loc))
!
s = irow_loc(1)
e = irow_loc(nrank_loc+1)-1
irow_loc(:) = irow_loc(:)-s+1 ! Shifted relative irow
WRITE(*,'(a,i3.3,a,3i12)') 'PE', me, ':s, e, nnz_loc', s, e, nnz_loc
!
! Read cols
ALLOCATE(cols(nnz))
READ(lun) cols
cols_loc(:) = cols(s:e)
DEALLOCATE(cols)
!
! Read vals
ALLOCATE(val(nnz))
READ(lun) val
val_loc(:) = val(s:e)
DEALLOCATE(val)
!
! Read RHS
ALLOCATE(rhs(nrank))
READ(lun) rhs
rhs_loc(:) = rhs(istart:iend)
DEALLOCATE(rhs)
!
!!$ mem_loc = mem()
!!$ CALL minmax_r(mem_loc, comm, 'mem used (MB) after matrix read')
!===========================================================================
! 3.0 Call PWSMP
!
! Initializing of PWSMP.
!
!!$ CALL pwsmp_initialize
ALLOCATE(invp(nrank), perm(nrank))
!
! Fill 'iparm' and 'dparm' arrays with default values.
iparm(1:3) = 0
CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, &
& rhs_loc, nrank_loc, nrhs, &
& aux, naux, mrp, iparm, dparm)
IF(iparm(64).NE.0) THEN
PRINT*, 'WSMP init failed with iparm(64) =', iparm(64)
CALL mpi_abort(comm, iparm(64), ierr)
ELSE
IF(me.EQ.0) PRINT*, 'WSMP init ok'
END IF
!
! Ordering
iparm(2) = 1
iparm(3) = 1
CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, &
& rhs_loc, nrank_loc, nrhs, &
& aux, naux, mrp, iparm, dparm)
IF(iparm(64).NE.0) THEN
PRINT*, 'WSMP ordering failed with iparm(64) =', iparm(64)
CALL mpi_abort(comm, iparm(64), ierr)
ELSE
IF(me.EQ.0) PRINT*, 'WSMP ordering ok'
END IF
!
! Symbolic factorization
iparm(2) = 2
iparm(3) = 2
CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, &
& rhs_loc, nrank_loc, nrhs, &
& aux, naux, mrp, iparm, dparm)
IF(iparm(64).NE.0) THEN
PRINT*, 'WSMP symbolic failed with iparm(64) =', iparm(64)
CALL mpi_abort(comm, iparm(64), ierr)
ELSE
IF(me.EQ.0) PRINT*, 'WSMP symbolic ok'
END IF
IF(me.EQ.0) THEN
PRINT *,'Number of nonzeros in factor L = 1000 X ',iparm(24)
PRINT *,'Number of FLOPS in factorization = ',dparm(23)
PRINT *,'Double words needed to factor on 0 = 1000 X ',iparm(23)
END IF
!
! Cholesky factorizarion
iparm(2) = 3
iparm(3) = 3
t0 = mpi_wtime()
CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, &
& rhs_loc, nrank_loc, nrhs, &
& aux, naux, mrp, iparm, dparm)
tfact = mpi_wtime()-t0
IF(iparm(64).NE.0) THEN
PRINT*, 'WSMP Choleski failed with iparm(64) =', iparm(64)
CALL mpi_abort(comm, iparm(64), ierr)
ELSE
IF(me.EQ.0) PRINT*, 'WSMP Choleski ok'
END IF
!
! Backsolve
t0 = mpi_wtime()
DO it=1,nits
sol_loc=rhs_loc
iparm(2) = 4
iparm(3) = 4
CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, &
& sol_loc, nrank_loc, nrhs, &
& aux, naux, mrp, iparm, dparm)
END DO
rhs_loc=sol_loc
tsolv = (mpi_wtime()-t0)/REAL(nits,8)
IF(iparm(64).NE.0) THEN
PRINT*, 'WSMP backsolve failed with iparm(64) =', iparm(64)
CALL mpi_abort(comm, iparm(64), ierr)
ELSE
IF(me.EQ.0) PRINT*, 'WSMP backsolve ok'
END IF
!
! Iterative refinement
iparm(2) = 5
iparm(3) = 5
CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, &
& rhs_loc, nrank_loc, nrhs, &
& aux, naux, mrp, iparm, dparm)
IF(iparm(64).NE.0) THEN
PRINT*, 'WSMP refinement failed with iparm(64) =', iparm(64)
CALL mpi_abort(comm, iparm(64), ierr)
ELSE
IF(me.EQ.0) PRINT*, 'WSMP refinement ok'
END IF
!
!!$ mem_loc = mem()
!!$ CALL minmax_r(mem_loc, comm, 'mem used (MB) after PWSMP')
!===========================================================================
! 4.0 Check SOL
!
! Read SOL
ALLOCATE(sol(nrank))
READ(lun) sol
sol_loc(:) = sol(istart:iend)
DEALLOCATE(sol)
PRINT*, 'Comp. sol', SUM(rhs_loc)
PRINT*, 'Exact sol', SUM(sol_loc)
!
err=MAXVAL(ABS(sol_loc-rhs_loc))
CALL mpi_reduce(err, err_max, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr)
IF(me.EQ.0) THEN
PRINT*, 'Max. error', err_max
END IF
rhs_loc = rhs_loc-sol_loc
err = DOT_PRODUCT(rhs_loc,rhs_loc)
CALL mpi_reduce(err, err_norm, 1, MPI_DOUBLE_PRECISION, MPI_SUM, 0, comm, ierr)
IF(me.EQ.0) THEN
PRINT*, 'Norm of error', SQRT(err_norm)
END IF
!
!!$ mem_loc = mem()
!!$ CALL minmax_r(mem_loc, comm, 'mem used (MB)')
!===========================================================================
! 9.0 Epilogue
!
CALL minmax_r(tfact, comm, 'Factorisation time(s)')
CALL minmax_r(tsolv, comm, ' Backsolve time(s)')
CALL mpi_finalize(ierr)
!
CONTAINS
SUBROUTINE dist1d(comm, s0, ntot, s, nloc)
!
! 1d distribute ntot elements, returns offset s and local number of
! elements nloc.
!
IMPLICIT NONE
INCLUDE 'mpif.h'
INTEGER, INTENT(in) :: s0, ntot
INTEGER, INTENT(out) :: s, nloc
INTEGER :: comm, me, npes, ierr, naver, rem
!
CALL MPI_COMM_SIZE(comm, npes, ierr)
CALL MPI_COMM_RANK(comm, me, ierr)
naver = ntot/npes
rem = MODULO(ntot,npes)
s = s0 + MIN(rem,me) + me*naver
nloc = naver
IF( me.LT.rem ) nloc = nloc+1
END SUBROUTINE dist1d
!
SUBROUTINE minmax_r(x, comm, str)
CHARACTER(len=*), INTENT(in) :: str
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(in) :: comm
INTEGER :: me, ierr
DOUBLE PRECISION :: xmin, xmax
CALL mpi_comm_rank(comm, me, ierr)
CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr)
CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr)
IF( me.EQ.0 ) THEN
WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax
END IF
END SUBROUTINE minmax_r
!
END PROGRAM main
diff --git a/examples/tlocintv.f90 b/examples/tlocintv.f90
index 0208d29..a6970c2 100644
--- a/examples/tlocintv.f90
+++ b/examples/tlocintv.f90
@@ -1,140 +1,140 @@
!>
!> @file tlocintv.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Optimization of locintv
!
USE bsplines
IMPLICIT NONE
INTEGER :: nx, nidbas, ngauss, np, nits
DOUBLE PRECISION :: a, b, coefs(5)
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid
TYPE(spline1d) :: splx
!
INTEGER :: i, nerrs, it
DOUBLE PRECISION :: t0, t1, seconds, tscal, tscal_new, tvec, tvec_new
DOUBLE PRECISION, ALLOCATABLE :: xp(:)
INTEGER, ALLOCATABLE :: left(:)
!
INTERFACE
SUBROUTINE meshdist(coefs, x, nx)
DOUBLE PRECISION, INTENT(in) :: coefs(5)
INTEGER, INTENT(iN) :: nx
DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
END SUBROUTINE meshdist
END INTERFACE
!
NAMELIST /newrun/ nx, nidbas, ngauss, np, nits, a, b, coefs
!===========================================================================
! Read in data
!
nx = 8 ! Number oh intevals in x
nidbas = 3 ! Degree of splines
ngauss = 4 ! Number of Gauss points/interval
np = 10 ! Number of random points in [a,b]
nits = 1000000
a = 0.0
b = 1.0
coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
! see function FDIST in MESHDIST
!
READ(*,newrun)
WRITE(*,newrun)
!
! Define grid on x axis
!
ALLOCATE(xgrid(0:nx))
xgrid(0) = a
xgrid(nx) = b
CALL meshdist(coefs, xgrid, nx)
WRITE(*,'(/a/(10f8.3))') 'XGRID', xgrid(0:nx)
WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
!
! Set up spline
!
CALL set_spline(nidbas, ngauss, xgrid, splx)
WRITE(*,'(a,l1)') 'Is mesh equidistant? ', splx%nlequid
!
! Test locintv
!
ALLOCATE(xp(np))
ALLOCATE(left(np))
xp(:) = (b-a)*xp(:) + a
tscal = 0.0d0
tscal_new = 0.0d0
tvec = 0.0d0
tvec_new = 0.0d0
!
nerrs = 0
DO it=1,nits
CALL RANDOM_NUMBER(xp)
t0 = seconds()
DO i=1,np
CALL locintv_old(splx, xp(i), left(i))
END DO
tscal = tscal + seconds()-t0
nerrs = nerrs + COUNT(.NOT.in_interv(xp, left))
!
t0 = seconds()
DO i=1,np
CALL locintv(splx, xp(i), left(i))
END DO
tscal_new = tscal_new + seconds()-t0
nerrs = nerrs + COUNT(.NOT.in_interv(xp, left))
!
t0 = seconds()
CALL locintv_old(splx, xp, left)
tvec = tvec + seconds()-t0
nerrs = nerrs + COUNT(.NOT.in_interv(xp, left))
!
t0 = seconds()
CALL locintv(splx, xp, left)
tvec_new = tvec_new + seconds()-t0
nerrs = nerrs + COUNT(.NOT.in_interv(xp, left))
END DO
PRINT*, 'nerrs =', nerrs
!
tscal = tscal/(REAL(nits*np,8))
tscal_new = tscal_new/(REAL(nits*np,8))
tvec = tvec/(REAL(nits*np,8))
tvec_new = tvec_new/(REAL(nits*np,8))
WRITE(*,'(4a12)') 'scalar', 'scalar new', 'vector', 'vector new'
WRITE(*,'(4(1pe12.3))') tscal, tscal_new, tvec, tvec_new
!
! Clean up
!
DEALLOCATE(xp)
DEALLOCATE(left)
DEALLOCATE(xgrid)
CALL destroy_sp(splx)
!
CONTAINS
LOGICAL ELEMENTAL FUNCTION in_interv(x, l)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(in) :: l
in_interv = x.GE.xgrid(l) .AND. x.LT.xgrid(l+1)
END FUNCTION in_interv
END PROGRAM main
diff --git a/examples/tmassmat.f90 b/examples/tmassmat.f90
index 679f965..b284969 100644
--- a/examples/tmassmat.f90
+++ b/examples/tmassmat.f90
@@ -1,189 +1,189 @@
!>
!> @file tmassmat.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! FT of mass matrix
!
USE bsplines
USE matrix
!
IMPLICIT NONE
INTEGER :: nx, nidbas
INTEGER :: ngauss, nrank, kl, ku
INTEGER :: i, k, kmin, kmax
DOUBLE PRECISION :: pi, xlenght, dx, arg0, arg
DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), arow(:)
DOUBLE PRECISION, ALLOCATABLE :: fftmassm1(:), fftmassm2(:), fftmassm3(:)
DOUBLE PRECISION, ALLOCATABLE :: fftmass_shifted(:)
TYPE(spline1d) :: splx
TYPE(periodic_mat) :: massm
!
NAMELIST /newrun/ nx, nidbas, xlenght
!================================================================================
! 1.0 Prologue
!
pi = 4.0d0*ATAN(1.0d0)
!
nx = 8
nidbas = 3
xlenght = 2.0d0*pi
!
READ(*,newrun)
WRITE(*,newrun)
!
ngauss = nidbas+1 ! Exact integration for polynomials of degree 2*nidbas
!
ALLOCATE(xgrid(0:nx))
dx = xlenght/REAL(nx)
xgrid = (/ (i*dx,i=0,nx) /)
WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
!
CALL set_spline(nidbas, ngauss, xgrid, splx, .TRUE.)
!===========================================================================
! 2.0 Mass matrix
nrank = nx
kl = nidbas
ku = kl
CALL init(kl, ku, nrank, 1, massm)
CALL dismat(splx, massm)
!
ALLOCATE(arow(nrank))
!!$ WRITE(*,'(/a)') 'Mass matrix'
!!$ DO i=1,nrank
!!$ CALL getrow(massm, i, arow)
!!$ WRITE(*,'(10(1pe12.4))') arow
!!$ END DO
!===========================================================================
! 3.0 Fourier transform of Mass matrix
!
ALLOCATE(fftmassm1(0:nx-1))
ALLOCATE(fftmassm2(0:nx-1))
ALLOCATE(fftmassm3(0:nx-1))
IF(nidbas.LE.3) THEN
CALL analytic(nidbas, fftmassm1)
fftmassm1 = dx*fftmassm1
WRITE(*,'(/a/(10(1pe12.4)))') 'Analytic', fftmassm1
END IF
!
CALL calc_fftmass_old(splx, fftmassm2)
WRITE(*,'(/a/(10(1pe12.4)))') 'Old version', fftmassm2
IF(nidbas.LE.3) THEN
WRITE(*,'(a,1pe12.4)') 'error =', MAXVAL(ABS(fftmassm2-fftmassm1))
END IF
!
! Init DFT
kmin = -nx/2
kmax = nx/2-1
CALL init_dft(splx, kmin, kmax)
!
ALLOCATE(fftmass_shifted(kmin:kmax))
CALL calc_fftmass(splx, fftmass_shifted)
DO k=kmin, kmax
fftmassm3(MODULO(k+nx,nx))=fftmass_shifted(k)
END DO
WRITE(*,'(/a/(10(1pe12.4)))') 'New version', fftmassm3
WRITE(*,'(a,1pe12.4)') 'error =', MAXVAL(ABS(fftmassm3-fftmassm2))
!
! Check dftcoefs
WRITE(*,'(/a)') 'Check DFT of splines'
PRINT*, 'dims of dftcoefs', SHAPE(splx%dft%coefs)
DO i=0,nidbas
WRITE(*,'(a,i3,2(1pe12.4))') 'Sum of coefs for spline', i, &
& SUM(splx%dft%coefs(:,i))
END DO
!===========================================================================
! 9.0 Clean up
!
DEALLOCATE(xgrid)
DEALLOCATE(fftmassm1)
DEALLOCATE(fftmassm2)
DEALLOCATE(fftmassm3)
DEALLOCATE(fftmass_shifted)
DEALLOCATE(arow)
CALL destroy(massm)
CALL destroy_sp(splx)
CONTAINS
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE dismat(spl, mat)
TYPE(spline1d) :: spl
TYPE(periodic_mat) :: mat
INTEGER :: dim, nx, nidbas, ngauss
DOUBLE PRECISION, ALLOCATABLE :: fun(:,:), xgauss(:), wgauss(:)
INTEGER :: i, igauss, iw, jt, irow, jcol
DOUBLE PRECISION :: contrib
!
CALL get_dim(spl, dim, nx, nidbas)
ALLOCATE(fun(0:nidbas,1)) ! Spline
CALL get_gauss(spl, ngauss)
ALLOCATE(xgauss(ngauss), wgauss(ngauss))
!
DO i=1,nx
CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
DO igauss=1,ngauss
CALL basfun(xgauss(igauss), spl, fun, i)
DO jt=0,nidbas
DO iw=0,nidbas
contrib = fun(jt,1) * fun(iw,1) * wgauss(igauss)
irow=MODULO(i+iw-1,nx) + 1 ! Periodic BC
jcol=MODULO(i+jt-1,nx) + 1
CALL updtmat(mat, irow, jcol, contrib)
END DO
END DO
END DO
END DO
!
DEALLOCATE(fun)
DEALLOCATE(xgauss, wgauss)
END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
SUBROUTINE analytic(nidbas, mat)
!
! Analytic form for nidbas .le. 3
!
INTEGER, INTENT(in) :: nidbas
DOUBLE PRECISION, INTENT(out) :: mat(0:)
DOUBLE PRECISION :: arg0, arg, cosk
INTEGER :: n, k
!
n = SIZE(mat)
arg0 = 2.0d0*pi/REAL(n,8)
DO k=0,n-1
arg = k*arg0
cosk = COS(arg)
SELECT CASE (nidbas)
CASE (1)
mat(k) = (2.0d0 + cosk)/3.0d0
CASE (2)
mat(k) = (16.0d0 + cosk*(13.0d0+cosk))/30.0d0
CASE (3)
mat(k) = (272.0d0 + cosk*(297.0d0+cosk*(60.0d0+cosk)))/630.0d0
CASE default
WRITE(*,'(a,i4,a)') 'ANALYTIC: nidbas =', nidbas, ' is not implemented!'
STOP
END SELECT
END DO
END SUBROUTINE analytic
END PROGRAM main
diff --git a/examples/tmatrix_gb.f90 b/examples/tmatrix_gb.f90
index 6183335..106c0b7 100644
--- a/examples/tmatrix_gb.f90
+++ b/examples/tmatrix_gb.f90
@@ -1,114 +1,114 @@
!>
!> @file tmatrix_gb.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test some routines of module matrix
!
USE matrix
IMPLICIT NONE
TYPE(gbmat) :: mata
INTEGER, PARAMETER :: n=5, ku=3, kl=3
DOUBLE PRECISION :: arr(n), fulla(n,n), base
DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p
INTEGER :: i, j, iaway, pow
CHARACTER(len=32) :: str
!
CALL init(ku, ku, n, 0, mata)
CALL getvalp(mata, p)
PRINT*, 'shape of A: ', SHAPE(p)
!
! Test updtmat
p = 0.0d0
DO j=1,n
DO i=1,n
arr(i) = 10*i + j
IF( ABS(j-i) .LT. ku+1 ) CALL updtmat(mata, i, j, arr(i))
END DO
END DO
CALL prntmat('Test of UPDTMAT', p)
!
! Test PUTCOL
p = 0.0d0
DO j=1,n
DO i=1,n
arr(i) = 10*i + j
END DO
CALL putcol(mata, j, arr)
END DO
CALL prntmat('Test of PUTCOL', p)
!
!
! Test PUTROW
p = 0.0d0
DO i=1,n
DO j=1,n
arr(j) = 10*i + j
END DO
CALL putrow(mata, i, arr)
END DO
CALL prntmat('Test of PUTROW', p)
!
! Test GETCOL
fulla = 0.0
DO j=1,n
CALL getcol(mata, j, fulla(:,j))
END DO
CALL prntmat('Test of GETCOL', fulla)
!
iaway=4
arr = 0.0d0
arr(iaway) =1.0
CALL putrow(mata, iaway, arr)
CALL putcol(mata, iaway, arr)
WRITE(str,'(a,i3)') 'Away on i = j =',iaway
CALL prntmat(TRIM(str), p)
!
! Test GETCOL
fulla = 0.0
DO j=1,n
CALL getcol(mata, j, fulla(:,j))
END DO
CALL prntmat('Matrix full', fulla)
!
! Test of determinant
CALL determinant(mata, base, pow)
CALL prntmat('Factored A (gb)', p)
PRINT*, 'Prod. of factored A diagnonals', PRODUCT(p(kl+ku+1,:))
WRITE(*,'(a,1pe15.6,i6)') 'Determinant(bas,power) =', base, pow
PRINT*, 'Pivots ', mata%piv
!
call destroy(mata)
CONTAINS
SUBROUTINE prntmat(str, a)
DOUBLE PRECISION, DIMENSION(:,:) :: a
CHARACTER(len=*) :: str
INTEGER :: i
WRITE(*,'(a)') TRIM(str)
DO i=1,SIZE(a,1)
WRITE(*,'(10f8.1)') a(i,:)
END DO
END SUBROUTINE prntmat
END PROGRAM main
diff --git a/examples/tmatrix_pb.f90 b/examples/tmatrix_pb.f90
index eae0fd4..01d7f92 100644
--- a/examples/tmatrix_pb.f90
+++ b/examples/tmatrix_pb.f90
@@ -1,143 +1,143 @@
!>
!> @file tmatrix_pb.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test some routines of module matrix
!
USE matrix
IMPLICIT NONE
TYPE(pbmat) :: mata, matb
INTEGER, PARAMETER :: n=5, ku=3
DOUBLE PRECISION :: arr(n), fulla(n,n), fullb(n,n), base
DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p, pb
INTEGER :: i, j, info, pow
!
CALL init(ku, n, 0, mata)
CALL init(1, n, 0, matb)
CALL getvalp(mata, p)
CALL getvalp(matb, pb)
PRINT*, 'shape of A: ', SHAPE(p)
!
! Test updtmat
p = 0.0d0
DO i=1,n
DO j=i,n
arr(j) = 10*i + j
IF( ABS(j-i) .LT. ku+1 ) CALL updtmat(mata, i, j, arr(j))
END DO
END DO
CALL prntmat('Test of UPDTMAT', p)
!
! Test GETCOL
fulla = 0.0
DO j=1,n
CALL getcol(mata, j, fulla(:,j))
END DO
CALL prntmat('Full matrix from GETCOL', fulla)
!
! Test GETROW
fulla = 0.0
DO i=1,n
CALL getrow(mata, i, fulla(i,:))
END DO
CALL prntmat('Full matrix from GETROW', fulla)
!
! Test PUTCOL
p = 0.0d0
DO j=1,n
DO i=1,n
arr(i) = 10*i + j
END DO
CALL putcol(mata, j, arr)
END DO
CALL prntmat('Test of PUTCOL', p)
!
! Test PUTROW
p = 0.0d0
DO i=1,n
DO j=1,n
arr(j) = 10*i + j
END DO
CALL putrow(mata, i, arr)
END DO
CALL prntmat('Test of PUTROW', p)
!
arr = 0.0d0
arr(2) =1.0
CALL putrow(mata, 2, arr)
CALL prntmat('Away on i=2, j=2', p)
!
! Test GETCOL
fulla = 0.0
DO j=1,n
CALL getcol(mata, j, fulla(:,j))
END DO
CALL prntmat('Full matrix from GETCOL', fulla)
!
! Test GETROW
fulla = 0.0
DO i=1,n
CALL getrow(mata, i, fulla(i,:))
END DO
CALL prntmat('Full matrix from GETROW', fulla)
!
! Test GETELE
fulla = 0.0
DO i=1,n
DO j=1,n
IF(ABS(j-i).LT.ku+1) CALL getele(mata,i,j,fulla(i,j))
END DO
END DO
CALL prntmat('Full matrix from GETELE', fulla)
!
! Test of determinant
fullb = 0.0
DO i=1,n
fullb(i,i) = 2.0d0
IF(i.LT.n) fullb(i,i+1)=-1.0d0
IF(i.GT.1) fullb(i,i-1)=-1.0d0
END DO
DO j=1,n
CALL putcol(matb, j, fullb(:,j))
END DO
CALL prntmat('Mat. A (full)', fullb)
CALL prntmat('Mat. A (pb)', pb)
CALL determinant(matb, base, pow)
WRITE(*,'(a,1pe15.6,i6)') 'Determinant(bas,power) =', base, pow
!
CALL destroy(mata)
CALL destroy(matb)
CONTAINS
SUBROUTINE prntmat(str, a)
DOUBLE PRECISION, DIMENSION(:,:) :: a
CHARACTER(len=*) :: str
INTEGER :: i
WRITE(*,'(a)') TRIM(str)
DO i=1,SIZE(a,1)
WRITE(*,'(10f8.1)') a(i,:)
END DO
END SUBROUTINE prntmat
END PROGRAM main
diff --git a/examples/tmatrix_zpb.f90 b/examples/tmatrix_zpb.f90
index ba7c30d..2340d0b 100644
--- a/examples/tmatrix_zpb.f90
+++ b/examples/tmatrix_zpb.f90
@@ -1,137 +1,137 @@
!>
!> @file tmatrix_zpb.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test some routines of module matrix
!
USE matrix
IMPLICIT NONE
TYPE(zpbmat) :: mata, matb
INTEGER, PARAMETER :: n=5, ku=3
DOUBLE COMPLEX :: arr(n), fulla(n,n), fullb(n,n), base
DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: p, pb
INTEGER :: i, j, pow
!
CALL init(ku, n, 0, mata)
CALL init(1, n, 0, matb)
CALL getvalp(mata, p)
CALL getvalp(matb, pb)
PRINT*, 'shape of A: ', SHAPE(p)
!
! Test updtmat
p = 0.0d0
DO i=1,n
DO j=i,n
arr(j) = CMPLX(10*i + j, j-i)
IF( ABS(j-i) .LT. ku+1 ) CALL updtmat(mata, i, j, arr(j))
END DO
END DO
CALL prntmat('Test of UPDTMAT', p)
!
! Test GETCOL
fulla = 0.0
DO j=1,n
CALL getcol(mata, j, fulla(:,j))
END DO
CALL prntmat('Full matrix from GETCOL', fulla)
!
! Test GETROW
fulla = 0.0
DO i=1,n
CALL getrow(mata, i, fulla(i,:))
END DO
CALL prntmat('Full matrix from GETROW', fulla)
!
! Test PUTCOL
p = 0.0d0
DO j=1,n
CALL putcol(mata, j, fulla(:,j))
END DO
CALL prntmat('Test of PUTCOL', p)
!
! Test PUTROW
p = 0.0d0
DO i=1,n
CALL putrow(mata, i, fulla(i,:))
END DO
CALL prntmat('Test of PUTROW', p)
!
arr = 0.0d0
arr(2) =1.0
CALL putrow(mata, 2, arr)
CALL prntmat('Away on i=2, j=2', p)
!
! Test GETCOL
fulla = 0.0
DO j=1,n
CALL getcol(mata, j, fulla(:,j))
END DO
CALL prntmat('Full matrix from GETCOL', fulla)
!
! Test GETROW
fulla = 0.0
DO i=1,n
CALL getrow(mata, i, fulla(i,:))
END DO
CALL prntmat('Full matrix from GETROW', fulla)
!
! Test GETELE
fulla = 0.0
DO i=1,n
DO j=1,n
IF(ABS(j-i).LT.ku+1) CALL getele(mata,i,j,fulla(i,j))
END DO
END DO
CALL prntmat('Full matrix from GETELE', fulla)
!
! Test of determinant
fullb = 0.0
DO i=1,n
fullb(i,i) = 2.0d0
IF(i.LT.n) fullb(i,i+1)=-1.0d0
IF(i.GT.1) fullb(i,i-1)=-1.0d0
END DO
DO j=1,n
CALL putcol(matb, j, fullb(:,j))
END DO
CALL prntmat('Mat. A (full)', fullb)
CALL prntmat('Mat. A (pb)', pb)
CALL determinant(matb, base, pow)
WRITE(*,'(a,2f8.5,i3)') 'Determinant(base,power) = ', base, pow
!
CALL destroy(mata)
CALL destroy(matb)
CONTAINS
SUBROUTINE prntmat(str, a)
DOUBLE COMPLEX, DIMENSION(:,:) :: a
CHARACTER(len=*) :: str
INTEGER :: i
WRITE(*,'(a)') TRIM(str)
DO i=1,SIZE(a,1)
WRITE(*,'(5(5x,"(",f5.1,",",f5.1,")"))') a(i,:)
END DO
END SUBROUTINE prntmat
END PROGRAM main
diff --git a/examples/tp2p_mat.f90 b/examples/tp2p_mat.f90
index d5084fb..3a6b318 100644
--- a/examples/tp2p_mat.f90
+++ b/examples/tp2p_mat.f90
@@ -1,108 +1,108 @@
!>
!> @file tp2p_mat.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
USE pardiso_bsplines
IMPLICIT NONE
INCLUDE 'mpif.h'
!
INTEGER :: me, npes, ierr
INTEGER :: next
INTEGER :: i, j, rank
DOUBLE PRECISION :: val
DOUBLE PRECISION, ALLOCATABLE :: arow(:)
TYPE(pardiso_mat) :: mat
!
CALL mpi_init(ierr)
CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
IF(npes.NE.2) THEN
PRINT*, 'Should run with 2 procs!'
CALL mpi_abort(MPI_COMM_WORLD, -1, ierr)
END IF
!
! Define local matrix
!
rank = npes
CALL init(npes, 0, mat)
DO i=1,rank ! Fill row me+1
val = me+1
j = me+1
CALL updtmat(mat, i, me+1, val)
END DO
!
! Exchange matrix
!
CALL disp_mat('Original matrix')
next=MODULO(me+1,2)
!
IF(me.EQ.0) THEN
CALL p2p_mat(mat, 1, 'send', 'put', MPI_COMM_WORLD)
ELSE
CALL p2p_mat(mat, 0, 'recv', 'put', MPI_COMM_WORLD)
END IF
CALL disp_mat('Matrix after 0->1/put')
!
CALL p2p_mat(mat, next, 'sendrecv', 'put', MPI_COMM_WORLD)
CALL disp_mat('Matrix after sendrev/put')
!
CALL p2p_mat(mat, next, 'sendrecv', 'updt', MPI_COMM_WORLD)
CALL disp_mat('Matrix after sendrev/updt')
!
IF(me.EQ.1) THEN
CALL p2p_mat(mat, 0, 'send', 'updt', MPI_COMM_WORLD)
ELSE
CALL p2p_mat(mat, 1, 'recv', 'updt', MPI_COMM_WORLD)
END IF
CALL disp_mat('Matrix after 1->0/updt')
!
IF(me.EQ.1) THEN
CALL p2p_mat(mat, 0, 'send', 'put', MPI_COMM_WORLD)
ELSE
CALL p2p_mat(mat, 1, 'recv', 'put', MPI_COMM_WORLD)
END IF
CALL disp_mat('Matrix after 1->0/put')
!
CALL mpi_finalize(ierr)
CONTAINS
SUBROUTINE disp_mat(str)
CHARACTER(len=*), INTENT(in) :: str
INTEGER :: p
DO p=0,npes-1
IF(me.EQ.p) THEN
WRITE(*,'(a,i3.3)') str//' on PE', me
CALL to_mat(mat, nlkeep=.TRUE.)
ALLOCATE(arow(mat%rank))
DO i=1,mat%rank
CALL getrow(mat, i, arow)
WRITE(*,'(10f8.2)') arow
END DO
DEALLOCATE(arow)
CALL FLUSH(6)
END IF
CALL mpi_barrier(MPI_COMM_WORLD, ierr)
END DO
END SUBROUTINE disp_mat
END PROGRAM main
diff --git a/examples/tpsum_mat.f90 b/examples/tpsum_mat.f90
index 5f01441..30d30f9 100644
--- a/examples/tpsum_mat.f90
+++ b/examples/tpsum_mat.f90
@@ -1,77 +1,77 @@
!>
!> @file tpsum_mat.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!!$ USE pardiso_bsplines
!!$ USE wsmp_bsplines
USE mumps_bsplines
IMPLICIT NONE
INCLUDE 'mpif.h'
!
INTEGER :: me, npes, ierr
INTEGER :: i, j, rank
DOUBLE PRECISION :: val
DOUBLE PRECISION, ALLOCATABLE :: arow(:)
!!$ TYPE(pardiso_mat) :: mat
!!$ TYPE(wsmp_mat) :: mat
TYPE(mumps_mat) :: mat
!
CALL mpi_init(ierr)
CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
!
rank = npes
CALL init(npes, 1, mat)
DO i=1,rank ! Fill row me+1
val = me+1
j = me+1
CALL updtmat(mat, i, me+1, val)
END DO
!
!!$ CALL disp_mat('Original matrix')
CALL psum_mat(mat, MPI_COMM_WORLD)
CALL disp_mat('Global sum of matrix')
!
CALL mpi_finalize(ierr)
CONTAINS
SUBROUTINE disp_mat(str)
CHARACTER(len=*), INTENT(in) :: str
INTEGER :: p
DO p=0,npes-1
IF(me.EQ.p) THEN
CALL to_mat(mat, nlkeep=.TRUE.)
WRITE(*,'(a,i3.3,a,2i6)') str//' on PE', me, ': rank, nnz', mat%rank, mat%nnz
ALLOCATE(arow(mat%rank))
DO i=1,mat%rank
CALL getrow(mat, i, arow)
WRITE(*,'(10f8.2)') arow
END DO
DEALLOCATE(arow)
CALL FLUSH(6)
END IF
CALL mpi_barrier(MPI_COMM_WORLD, ierr)
END DO
END SUBROUTINE disp_mat
END PROGRAM main
diff --git a/examples/tsparse1.f90 b/examples/tsparse1.f90
index c22a07f..eb3e44c 100644
--- a/examples/tsparse1.f90
+++ b/examples/tsparse1.f90
@@ -1,117 +1,117 @@
!>
!> @file tsparse1.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! MAIN !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
PROGRAM main
USE sparse
IMPLICIT none
!
TYPE(zspmat) :: amat
TYPE(zsprow) :: arow
!
LOGICAL :: found
INTEGER :: n=10
INTEGER :: jcol, nnz, i
DOUBLE PRECISION :: valr, vali
DOUBLE COMPLEX :: val, zero=(0.,0.)
DOUBLE COMPLEX, ALLOCATABLE :: arr(:), farr(:)
INTEGER, ALLOCATABLE :: col(:), newcol(:)
!
! Initialize the sparse matrix amat
!
CALL init(n, amat)
!
! Use UPDT_SP to update a sparse row
!
WRITE(*,*) 'Enter a list of positive indices, terminate with a zero'
DO
READ(*,*) jcol
IF(jcol .LE. 0) EXIT
CALL RANDOM_NUMBER(valr)
vali = jcol
val = CMPLX(valr, vali)
CALL updtmat(arow, jcol, val)
END DO
!
! Convert a sparse row to a sequential row
!
nnz = arow%nnz
WRITE(*,'(a,i5)') 'nnz =', nnz
!
ALLOCATE(arr(nnz), col(nnz), newcol(nnz))
CALL getrow(arow, arr, col)
WRITE(*, '(a/(10i8))') 'col', col
WRITE(*, '(a/(10f8.4))') 'arr', arr
!
ALLOCATE(farr(MAXVAL(col)))
CALL getrow(arow, farr)
WRITE(*, '(/a/(10f8.4))') 'farr', farr
!
! Clear element by element of row
!
DO i=1,nnz
CALL putele(arow, col(i), zero)
CALL getrow(arow, arr, newcol)
WRITE(*, '(/a,i6/(10i8))') 'col', arow%nnz, newcol(1:arow%nnz)
END DO
!
! Re-create row using PUTROW and full row
!
CALL putrow(arow, farr)
CALL getrow(arow, arr, col)
WRITE(*,'(/a,i5)') 'nnz =', arow%nnz
WRITE(*, '(a/(10i8))') 'col', col
WRITE(*, '(a/(10f8.4))') 'arr', arr
!
! Clear row using DESTROY
!
CALL destroy(arow)
CALL getrow(arow, arr, newcol)
nnz = arow%nnz
WRITE(*, '(/a,i6/(10i8))') 'col', nnz, newcol(1:nnz)
!
! Re-create row using PUTROW and sparse row
!
CALL putrow(arow, arr, col)
CALL getrow(arow, arr, newcol, nnz)
WRITE(*, '(/a,i6/(10i8))') 'col', nnz, newcol(1:nnz)
WRITE(*, '(a/(10f8.4))') 'arr', arr
!
! Test GETELE
!
i=111;val=0
CALL getele(arow, i, val, found)
WRITE(*,'(/i8,2f8.4,l3)') i, val, found
DO i=1,nnz
CALL getele(arow, col(i), val, found)
WRITE(*,'(i8,2f8.4,l3)') col(i), val, found
END DO
!
! Test destroy_spmat
!
CALL destroy(amat)
END PROGRAM main
diff --git a/examples/tsparse2.f90 b/examples/tsparse2.f90
index 23d3fcf..98efd21 100644
--- a/examples/tsparse2.f90
+++ b/examples/tsparse2.f90
@@ -1,151 +1,151 @@
!>
!> @file tsparse2.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Simple 2D Poisson using 5 points FD
!
USE pardiso_bsplines
IMPLICIT NONE
!
TYPE(pardiso_mat) :: amat, bmat
DOUBLE PRECISION, ALLOCATABLE :: arow(:), rhs(:), sol(:)
DOUBLE PRECISION, ALLOCATABLE :: arown(:,:), rhsn(:,:), soln(:,:)
INTEGER :: nx=5, ny=4, n
INTEGER :: i, j, irow, jcol
!
DOUBLE PRECISION :: mem, seconds
!
n = nx*ny
CALL init(n, 1, amat) ! Non-symmetric matrix
CALL init(n, 1, bmat, nlsym=.TRUE.) ! Symmetric matrix
ALLOCATE(arow(n), arown(n,2))
ALLOCATE(rhs(n), rhsn(n,2))
ALLOCATE(sol(n), soln(n,2))
!
! Construct the FD matrix amat, using sparse rows (linked lists)
!
DO j=1,ny
DO i=1,nx
arow = 0.0d0
irow = numb(i,j)
arow(irow) = 4.0d0
IF(i.GT.1) arow(numb(i-1,j)) = -1.0d0
IF(i.LT.nx) arow(numb(i+1,j)) = -1.0d0
IF(j.GT.1) arow(numb(i,j-1)) = -1.0d0
IF(j.LT.ny) arow(numb(i,j+1)) = -1.0d0
rhs(irow) = SUM(arow)
CALL putrow(amat, irow, arow) ! General matrix
CALL putrow(bmat, irow, arow) ! Symmetric matrix
END DO
END DO
!
! Print the matrices
!
WRITE(*,'(/a)') 'Matrix A'
DO i=1,n
CALL getrow(amat, i, arow)
WRITE(*,'(30f4.0)') arow
END DO
PRINT*, 'nnz from get_count', get_count(amat)
!
WRITE(*,'(/a)') 'Matrix B'
DO i=1,n
CALL getrow(bmat, i, arow)
WRITE(*,'(30f4.0)') arow
END DO
PRINT*, 'nnz from get_count', get_count(bmat)
!
! Factor the matrix using Pardiso
!
CALL factor(amat, nlmetis=.TRUE.)
WRITE(*,'(/a,i5)') 'Number of nonzeros in factors of A = ',amat%p%iparm(18)
WRITE(*,'(a,i5)') 'Number of factorization MFLOPS = ',amat%p%iparm(19)
!
CALL factor(bmat, nlmetis=.TRUE.)
WRITE(*,'(/a,i5)') 'Number of nonzeros in factors of B = ',bmat%p%iparm(18)
WRITE(*,'(a,i5)') 'Number of factorization MFLOPS = ',bmat%p%iparm(19)
!
WRITE(*,'(/a/(10f8.4))') 'rhs', rhs
!
! Backsolve Ax = b, using Pardiso
!
sol = rhs
CALL bsolve(amat, sol, debug=.FALSE.)
WRITE(*,'(/a/(10f8.4))') 'sol (non-sym)', sol
WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0))
!
! Backsolve Bx = b, using Pardiso
!
sol = rhs
CALL bsolve(bmat, sol, debug=.FALSE.)
WRITE(*,'(/a/(10f8.4))') 'sol (sym)', sol
WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0))
!
arow = vmx(amat, sol)
WRITE(*,'(/a/(10f8.4))') 'A*x (non-sym)', arow
WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arow-rhs))
!
arow = vmx(bmat, sol)
WRITE(*,'(/a/(10f8.4))') 'B*x (sym)', arow
WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arow-rhs))
!
! Multiple RHS
!
rhsn(:,1) = -rhs(:)
rhsn(:,2) = 2.d0*rhs(:)
CALL bsolve(amat, rhsn, soln)
WRITE(*,'(/a/(10f8.4))') 'soln (non-sym)', soln
WRITE(*,'(a,2(1pe12.3))') 'Error', MAXVAL(ABS(soln(:,1)+1.0d0)), &
& MAXVAL(ABS(soln(:,2)-2.0d0))
!
CALL bsolve(bmat, rhsn, soln)
WRITE(*,'(/a/(10f8.4))') 'soln (sym)', soln
WRITE(*,'(a,2(1pe12.3))') 'Error', MAXVAL(ABS(soln(:,1)+1.0d0)), &
& MAXVAL(ABS(soln(:,2)-2.0d0))
!
arown = vmx(amat, soln)
WRITE(*,'(/a/(10f8.4))') 'A*x (non-sym)', arown
WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arown-rhsn))
!
arown = vmx(bmat, soln)
WRITE(*,'(/a/(10f8.4))') 'A*x (sym)', arown
WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arown-rhsn))
!
! Clean up
!
DEALLOCATE(arow,arown)
DEALLOCATE(rhs,rhsn)
DEALLOCATE(sol,soln)
CALL destroy(amat)
CALL destroy(bmat)
CONTAINS
INTEGER FUNCTION numb(i,j)
INTEGER, INTENT(in) :: i, j
numb = (j-1)*nx + i
END FUNCTION numb
!
END PROGRAM main
diff --git a/examples/zpardiso_ex1.f b/examples/zpardiso_ex1.f
index 7979d88..33027c6 100644
--- a/examples/zpardiso_ex1.f
+++ b/examples/zpardiso_ex1.f
@@ -1,96 +1,96 @@
!>
!> @file zpardiso_ex1.f
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
USE wsmp_bsplines
USE pardiso_bsplines
IMPLICIT NONE
c
INTEGER :: n=9
INTEGER ia(10)
INTEGER ja(29)
COMPLEX*16 avals(29)
COMPLEX*16 b(9), sol(9), arow(9)
c
c$$$ type(zwsmp_mat) :: mat
type(zpardiso_mat) :: mat
integer :: i, k
c
DATA ia /1,5,9,13,17,21,25,27,29,30/
data ja
1 /1, 3, 7, 8,
2 2, 3, 8, 9,
3 3, 7, 8, 9,
4 4, 6, 7, 8,
5 5, 6, 8, 9,
6 6, 7, 8, 9,
7 7, 8,
8 8, 9,
9 9/
data avals
1 /(14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1),
2 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2),
3 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1),
4 (14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1),
5 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2),
6 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1),
7 (16.d0,0.d0), (-4.d0,-2.d-1),
8 (71.d0,0.d0), (-4.d0,-2.d-1),
9 (16.d0,0.d0)/
c
c$$$ call init(n, 1, mat, nlherm=.false., nlsym=.true., nlpos=.true.)
call init(n, 1, mat, nlherm=.true., nlpos=.true.)
do i=1,n
do k=ia(i),ia(i+1)-1
call putele(mat, i, ja(k), avals(k))
end do
end do
c
call factor(mat)
c
print*, 'diff of val', cnorm2(avals-mat%val)
print*, 'diff of ia', ia-mat%irow
print*,' diff ja', ja-mat%cols
c
print*, 'The RHS:'
do i = 1, n
call getrow(mat,i, arow)
b(i) = sum(arow)
print *,i,' : ',b(i)
end do
call bsolve(mat,b,sol)
print *,'The solution of the system is as follows:'
do i = 1, n
print *,i,' : ',sol(i)
end do
print*, 'Residue =', cnorm2(vmx(mat,sol)-b)
contains
FUNCTION cnorm2(x)
DOUBLE COMPLEX, INTENT(in) :: x(:)
DOUBLE PRECISION :: cnorm2
cnorm2 = SQRT(DOT_PRODUCT(x,x))
END FUNCTION cnorm2
c
END PROGRAM main
diff --git a/examples/zssmp_ex1.f b/examples/zssmp_ex1.f
index cae1c08..dbe84fc 100644
--- a/examples/zssmp_ex1.f
+++ b/examples/zssmp_ex1.f
@@ -1,111 +1,111 @@
!>
!> @file zssmp_ex1.f
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
USE wsmp_bsplines
USE pardiso_bsplines
IMPLICIT NONE
c
INTEGER :: n=9
INTEGER ia(10)
INTEGER ja(29)
COMPLEX*16 avals(29)
COMPLEX*16 b(9), sol(9), arow(9)
c
type(zwsmp_mat) :: mat
integer :: i, k
c
DATA ia /1,5,9,13,17,21,25,27,29,30/
data ja
1 /1, 3, 7, 8,
2 2, 3, 8, 9,
3 3, 7, 8, 9,
4 4, 6, 7, 8,
5 5, 6, 8, 9,
6 6, 7, 8, 9,
7 7, 8,
8 8, 9,
9 9/
data avals
1 /(14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1),
2 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2),
3 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1),
4 (14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1),
5 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2),
6 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1),
7 (16.d0,0.d0), (-4.d0,-2.d-1),
8 (71.d0,0.d0), (-4.d0,-2.d-1),
9 (16.d0,0.d0)/
c
call init(n, 1, mat, nlherm=.true., nlpos=.true.)
do i=1,n
do k=ia(i),ia(i+1)-1
call putele(mat, i, ja(k), avals(k))
end do
end do
c
print*, 'The RHS before tomat:'
do i = 1, n
call getrow(mat,i, arow)
b(i) = sum(arow)
print *,i,' : ',b(i)
end do
c
call factor(mat)
c
write(*,'(a/(20f6.2))') 'avals', avals
write(*,'(a/(20f6.2))') 'mat%val', mat%val
print*, 'diff of val', cnorm2(avals-mat%val)
print*, 'diff of ia', ia-mat%irow
print*,' diff ja', ja-mat%cols
c
print*, 'Check getrow'
do i = 1, n
call getrow(mat,i, arow)
write(*,'(i3,": ",(20f6.2))') i, arow(i:n)
end do
c
print*, 'The RHS:'
do i = 1, n
call getrow(mat,i, arow)
b(i) = sum(arow)
print *,i,' : ',b(i)
end do
call bsolve(mat,b,sol)
print *,'Norm of Residual = ',mat%p%dparm(7)
print *,'The solution of the system is as follows:'
do i = 1, n
print *,i,' : ',sol(i)
end do
print*, 'Residue =', cnorm2(vmx(mat,sol)-b)
c
contains
FUNCTION cnorm2(x)
DOUBLE COMPLEX, INTENT(in) :: x(:)
DOUBLE PRECISION :: cnorm2
cnorm2 = SQRT(DOT_PRODUCT(x,x))
END FUNCTION cnorm2
c
END PROGRAM main
diff --git a/fft/CMakeLists.txt b/fft/CMakeLists.txt
index 82f6ec0..1b099c2 100644
--- a/fft/CMakeLists.txt
+++ b/fft/CMakeLists.txt
@@ -1,69 +1,69 @@
#
# @file CMakeLists.txt
#
# @brief Principal CMake configuration file for the fft library
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Nicolas Richart
# @author Trach-Minh Tran
#
project(fft)
find_package(FFTW REQUIRED)
set(fft_w $ENV{fft_w})
set(SRCS
fft_fftw.F90
)
set(EXAMPLES tfft.f90)
add_library(fft STATIC ${SRCS})
target_include_directories(fft
PRIVATE $
${FFTW_INCLUDES}
INTERFACE $
$
${FFTW_INCLUDES}
)
if (${fft_w} MATCHES "fft_w2")
target_compile_options(fft PRIVATE "-Dfft_w2")
else()
target_compile_options(fft PRIVATE "-Dfft_w3")
endif()
target_link_libraries(fft PUBLIC ${FFTW_LIBRARY} ${MPI_Fortran_LIBRARIES})
#
set_property(TARGET fft
PROPERTY PUBLIC_HEADER ${CMAKE_CURRENT_BINARY_DIR}/modules/fft.mod)
add_executable(tfft tfft.f90)
target_link_libraries(tfft fft ${MPI_Fortran_LIBRARIES})
#add_test(tfft ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1
# ${CMAKE_CURRENT_BINARY_DIR}/tfft < ${fft_SOURCE_DIR}/in)
install(TARGETS fft
EXPORT ${BSPLINES_EXPORT_TARGETS}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)
diff --git a/fft/Makefile b/fft/Makefile
index bb7a818..895fdb9 100644
--- a/fft/Makefile
+++ b/fft/Makefile
@@ -1,68 +1,68 @@
#
# @file Makefile
#
# @brief Makefile for the fft library
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Trach-Minh Tran
#
FFTW=$(FFTW_HOME)
F90 = ifort
LD = ifort
debug = -g -traceback -CB
optim = -O3
OPT=$(debug)
#OPT=$(optim)
F90FLAGS = $(OPT) -Dfft_w3 -I$(FFTW)/include
LDFLAGS = $(OPT) -L. -L$(FFTW)/lib
LIBS = -lfft -lfftw3
.SUFFIXES:
.SUFFIXES: .o .c .F90 .f90 .f
.f90.o:
$(F90) $(F90FLAGS) -c $<
.F90.o:
$(F90) $(F90FLAGS) -c $<
.f.o:
$(F90) $(F90FLAGS) -c $<
all: tfft
lib: libfft.a
libfft.a: fft_fftw.o
xiar r $@ $?
ranlib $@
tfft: tfft.o
$(LD) $(LDFLAGS) -o $@ tfft.o fft_fftw.o $(LIBS)
tfft.o: lib
clean:
rm -f *.o *.mod *~ a.out
distclean: clean
rm -f tfft
diff --git a/fft/fft_fftw.F90 b/fft/fft_fftw.F90
index ff9e7ca..cd404a8 100644
--- a/fft/fft_fftw.F90
+++ b/fft/fft_fftw.F90
@@ -1,1376 +1,1376 @@
!>
!> @file fft_fftw.F90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
#if defined(fft_w2)
MODULE fft
!
IMPLICIT NONE
!
PRIVATE
!
PUBLIC four1, fourcol, fourrow
!
! Global parameters
!
INTEGER, PARAMETER :: MXPLAN=8
!
! Global variables
!
INTEGER ::n1d_saved=0
INTEGER*8, DIMENSION(MXPLAN) :: plan1d
INTEGER, DIMENSION(MXPLAN) :: n1d
REAL, DIMENSION(:), ALLOCATABLE :: scr1_real
DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: scr1
INTERFACE fourcol
MODULE PROCEDURE four1, fourcol_ra, fourcol_raa
END INTERFACE
CONTAINS
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
SUBROUTINE four1(arr, isign)
!
! A single 1D complex FFT
!
INCLUDE 'fftw_f77.h'
!
! Dummy arguments
!
DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: arr
INTEGER, INTENT(IN) :: isign
!
! Local variables
!
INTEGER :: n, id
!
n = SIZE(arr)
IF( .NOT. ALLOCATED(scr1) ) THEN
ALLOCATE(scr1(n))
ELSE
IF ( SIZE(scr1) < n ) THEN
DEALLOCATE(scr1)
ALLOCATE(scr1(n))
END IF
END IF
!
CALL getplan(n, isign, id, 1)
CALL fftw_f77_one(plan1d(id), arr, scr1)
END SUBROUTINE four1
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
SUBROUTINE fourcol_ra(arr, isign)
!
! 1D complex FFT of columns of arr(1:N,1:howmany)
!
INCLUDE 'fftw_f77.h'
!
! Dummy arguments
!
DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
INTEGER, INTENT(IN) :: isign
!
! Local variables
!
INTEGER :: n, howmany, id
!
n = SIZE(arr,1)
howmany = SIZE(arr,2)
!
IF( .NOT. ALLOCATED(scr1) ) THEN
ALLOCATE(scr1(n))
ELSE
IF ( SIZE(scr1) < n ) THEN
DEALLOCATE(scr1)
ALLOCATE(scr1(n))
END IF
END IF
!
CALL getplan(n, isign, id,1)
CALL fftw_f77(plan1d(id), howmany, arr, 1, n, scr1, 1, n)
END SUBROUTINE fourcol_ra
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
SUBROUTINE fourcol_raa(arr, isign)
!
! 1D complex FFT of columns of arr(1:N,1:howmany)
!
INCLUDE 'fftw_f77.h'
!
! Dummy arguments
!
DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr
INTEGER, INTENT(IN) :: isign
!
! Local variables
!
INTEGER :: n, howmany, id
!
n = SIZE(arr,1)
howmany = SIZE(arr,2)*SIZE(arr,3)
!
IF( .NOT. ALLOCATED(scr1) ) THEN
ALLOCATE(scr1(n))
ELSE
IF ( SIZE(scr1) < n ) THEN
DEALLOCATE(scr1)
ALLOCATE(scr1(n))
END IF
END IF
!
CALL getplan(n, isign, id, 1)
CALL fftw_f77(plan1d(id), howmany, arr, 1, n, scr1, 1, n)
END SUBROUTINE fourcol_raa
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
SUBROUTINE fourrow(arr, isign)
!
! 1D complex FFT of rows of arr(1:howmany,1:N)
!
INCLUDE 'fftw_f77.h'
!
! Dummy arguments
!
DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
INTEGER, INTENT(IN) :: isign
!
! Local variables
!
INTEGER :: n, howmany, id
!
n = SIZE(arr,2)
howmany = SIZE(arr,1)
!
IF( .NOT. ALLOCATED(scr1) ) THEN
ALLOCATE(scr1(n))
ELSE
IF ( SIZE(scr1) < n ) THEN
DEALLOCATE(scr1)
ALLOCATE(scr1(n))
END IF
END IF
!
CALL getplan(n, isign, id, 1)
CALL fftw_f77(plan1d(id), howmany, arr, howmany, 1, &
& scr1, howmany, 1)
END SUBROUTINE fourrow
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
SUBROUTINE getplan(n, sign, id, complex_fftw)
!
! Create or get an already created FFT plan (depends only on N.)
!
INCLUDE 'fftw_f77.h'
!
! Dummy arguments
!
INTEGER, INTENT(IN) :: n ! size of transform
INTEGER, INTENT(IN) :: sign ! dir. of transform -1=>FORWARD, +1=>BACKWARD
INTEGER, INTENT(OUT) :: id ! id of FFT plan
INTEGER, INTENT(IN) :: complex_fftw ! Create complex<->complex transform if =1
!
! Local variables
!
INTEGER :: k, i, dir
!
k = sign*(2*n+complex_fftw)
DO i = 1,n1d_saved
IF( k == n1d(i)) THEN
id = i
RETURN
END IF
END DO
IF( n1d_saved == MXPLAN) THEN
PRINT*, 'Module fft: MXPLAN too small! Increase it and recompile'
STOP
END IF
n1d_saved = n1d_saved+1
n1d(n1d_saved) = k
id = n1d_saved
dir = FFTW_FORWARD
IF( sign == +1 ) dir = FFTW_BACKWARD
IF (complex_fftw == 1) THEN
CALL fftw_f77_create_plan(plan1d(id), n, dir, FFTW_ESTIMATE + FFTW_IN_PLACE)
!!$ ELSE
!!$ CALL rfftw_f77_create_plan(plan1d(id), n, dir, FFTW_ESTIMATE + FFTW_IN_PLACE)
END IF
END SUBROUTINE getplan
END MODULE fft
#endif
!
#if defined(fft_w3)
MODULE fft
!
IMPLICIT NONE
!
PRIVATE
PUBLIC :: four1, fourcol, fourrow
!
INCLUDE 'fftw3.f'
!
TYPE int_para
INTEGER, DIMENSION(2) :: par ! size of transform
END TYPE int_para
!
! Global parameters
!
INTEGER, PARAMETER :: MXPLAN=16 ! define the maximum number of plans.
!
! Global variables
!
INTEGER*8, DIMENSION(MXPLAN,4), SAVE :: plan1d ! plans for 1-dim FFT
TYPE(int_para), DIMENSION(MXPLAN,4), SAVE :: n1d_par
INTEGER, DIMENSION(4), SAVE :: n1d_saved=0 ! number of plans saved
!
INTERFACE fourcol
MODULE PROCEDURE four1, fourcol_ra, fourcol_raa
END INTERFACE
!
CONTAINS
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
SUBROUTINE four1(vec, isign)
!
! Dummy arguments
!
DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: vec
INTEGER, INTENT(IN) :: isign
!
! Local parameters
!
INTEGER, PARAMETER :: NUM=1
!
! Local variables
!
INTEGER :: k
INTEGER :: dim1, i, id, istat, n
DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: vec_tmp
!
!
n = SIZE(vec)
!
! test if a plan that fits is already created.
!
id = -1
k = isign*n
DO i = 1,n1d_saved(NUM)
IF (k == n1d_par(i,NUM)%par(1)) THEN
id = i
EXIT
END IF
END DO
!
SELECT CASE (id)
CASE (-1)
!
! test if the maximal number of plans is alredy reached.
!
IF (n1d_saved(NUM) == MXPLAN) THEN
WRITE(*,*) 'FOUR1: MXPLAN too small! Increase it and recompile'
STOP
END IF
!
dim1 = SIZE(vec)
ALLOCATE(vec_tmp(dim1), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOUR1: Allocation of vec_tmp failed!'
STOP
END IF
!
n1d_saved(NUM) = n1d_saved(NUM)+1
n1d_par(n1d_saved(NUM),NUM)%par(1) = k
id = n1d_saved(NUM)
!
SELECT CASE (isign)
CASE (-1)
CALL dfftw_plan_dft_1d(plan1d(id,NUM), n, vec_tmp(1), vec_tmp(1), &
FFTW_FORWARD, FFTW_ESTIMATE)
CASE (1)
CALL dfftw_plan_dft_1d(plan1d(id,NUM), n, vec_tmp(1), vec_tmp(1), &
FFTW_BACKWARD, FFTW_ESTIMATE)
END SELECT
!
DEALLOCATE(vec_tmp, stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOUR1: Dellocation of vec_tmp failed!'
STOP
END IF
!
END SELECT
!
CALL dfftw_execute_dft(plan1d(id,NUM), vec(1), vec(1))
!
END SUBROUTINE four1
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
SUBROUTINE fourcol_ra(arr, isign)
!
! Dummy arguments
!
DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
INTEGER, INTENT(IN) :: isign
!
! Local parameters
!
INTEGER, PARAMETER :: NUM=2, RANK=1
!
! Local variables
!
INTEGER :: k
INTEGER :: dim1, dim2, howmany, i, id, istat, n
INTEGER, DIMENSION(RANK) :: n_arr, nembed
DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: arr_tmp
!
dim1 = SIZE(arr,1)
dim2 = SIZE(arr,2)
howmany = SIZE(arr,2)
!
! test if a plan that fits is already created.
!
id = -1
k = isign*dim1
DO i = 1,n1d_saved(NUM)
IF (k == n1d_par(i,NUM)%par(1) .AND. &
howmany == n1d_par(i,NUM)%par(2)) THEN
id = i
EXIT
END IF
END DO
!
SELECT CASE (id)
CASE (-1)
!
! test if the maximal number of plans is alredy reached.
!
IF (n1d_saved(NUM) == MXPLAN) THEN
WRITE(*,*) 'FOURCOL_RA: MXPLAN too small! Increase it and recompile'
STOP
END IF
!
ALLOCATE(arr_tmp(dim1, dim2), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOURCOL_RA: Allocation of arr_tmp failed!'
STOP
END IF
!
nembed(1) = SIZE(arr)
n_arr(1) = dim1
n = n_arr(1)
!
n1d_saved(NUM) = n1d_saved(NUM)+1
n1d_par(n1d_saved(NUM),NUM)%par(1) = k
n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
id = n1d_saved(NUM)
!
SELECT CASE (isign)
CASE (-1)
CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, &
arr_tmp(1,1), nembed, 1, n, arr_tmp(1,1), nembed, 1, n, &
FFTW_FORWARD, FFTW_ESTIMATE)
CASE (1)
CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, &
arr_tmp(1,1), nembed, 1, n, arr_tmp(1,1), nembed, 1, n, &
FFTW_BACKWARD, FFTW_ESTIMATE)
END SELECT
!
DEALLOCATE(arr_tmp, stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOURCOL_RA: Dellocation of arr_tmp failed!'
STOP
END IF
!
END SELECT
!
CALL dfftw_execute_dft(plan1d(id,NUM), arr(1,1), arr(1,1))
!
END SUBROUTINE fourcol_ra
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
SUBROUTINE fourcol_raa(arr, isign)
!
! Dummy arguments
!
DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr
INTEGER, INTENT(IN) :: isign
!
! Local parameters
!
INTEGER, PARAMETER :: NUM=3, RANK=1
!
! Local variables
!
INTEGER :: k
INTEGER :: dim1, dim2, dim3, howmany, i, id, istat, n
INTEGER, DIMENSION(RANK) :: n_arr, nembed
DOUBLE COMPLEX, DIMENSION(:,:,:), ALLOCATABLE :: arr_tmp
!
dim1 = SIZE(arr,1)
dim2 = SIZE(arr,2)
dim3 = SIZE(arr,3)
howmany = dim2*dim3
!
! test if a plan that fits is already created.
!
id = -1
k = isign*dim1
DO i = 1,n1d_saved(NUM)
IF (k == n1d_par(i,NUM)%par(1) .AND. &
howmany == n1d_par(i,NUM)%par(2)) THEN
id = i
EXIT
END IF
END DO
!
SELECT CASE (id)
CASE (-1)
!
! test if the maximal number of plans is alredy reached.
!
IF (n1d_saved(NUM) == MXPLAN) THEN
WRITE(*,*) 'FOURCOL_RAA: MXPLAN too small! Increase it and recompile'
STOP
END IF
!
ALLOCATE(arr_tmp(dim1, dim2, dim3), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOURCOL_RAA: Allocation of arr_tmp failed!'
STOP
END IF
!
nembed(1) = SIZE(arr)
n_arr(1) = dim1
n = n_arr(1)
!
n1d_saved(NUM) = n1d_saved(NUM)+1
n1d_par(n1d_saved(NUM),NUM)%par(1) = k
n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
id = n1d_saved(NUM)
!
SELECT CASE (isign)
CASE (-1)
CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, &
arr_tmp(1,1,1), nembed, 1, n, arr_tmp(1,1,1), nembed, 1, n, &
FFTW_FORWARD, FFTW_ESTIMATE)
CASE (1)
CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, &
arr_tmp(1,1,1), nembed, 1, n, arr_tmp(1,1,1), nembed, 1, n, &
FFTW_BACKWARD, FFTW_ESTIMATE)
END SELECT
!
DEALLOCATE(arr_tmp, stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOURCOL_RAA: Dellocation of arr_tmp failed!'
STOP
END IF
!
END SELECT
!
CALL dfftw_execute_dft(plan1d(id,NUM), arr(1,1,1), arr(1,1,1))
!
END SUBROUTINE fourcol_raa
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
SUBROUTINE fourrow(arr, isign)
!
! Dummy arguments
!
DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
INTEGER, INTENT(IN) :: isign
!
! Local parameters
!
INTEGER, PARAMETER :: NUM=4, RANK=1
!
! Local variables
!
INTEGER :: k
INTEGER :: dim1, dim2, howmany, i, id, istat, n
INTEGER, DIMENSION(RANK) :: n_arr, nembed
DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: arr_tmp
!
dim1 = SIZE(arr,1)
dim2 = SIZE(arr,2)
howmany = dim1
!
! test if a plan that fits is already created.
!
id = -1
k = isign*dim2
DO i = 1,n1d_saved(NUM)
IF (k == n1d_par(i,NUM)%par(1) .AND. &
howmany == n1d_par(i,NUM)%par(2)) THEN
id = i
EXIT
END IF
END DO
!
SELECT CASE (id)
CASE (-1)
!
! test if the maximal number of plans is alredy reached.
!
IF (n1d_saved(NUM) == MXPLAN) THEN
WRITE(*,*) 'FOURROW: MXPLAN too small! Increase it and recompile'
STOP
END IF
!
ALLOCATE(arr_tmp(dim1, dim2), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOURROW: Allocation of arr_tmp failed!'
STOP
END IF
!
nembed(1) = SIZE(arr)
n_arr(1) = SIZE(arr,2)
n = n_arr(1)
howmany = SIZE(arr,1)
!
n1d_saved(NUM) = n1d_saved(NUM)+1
n1d_par(n1d_saved(NUM),NUM)%par(1) = k
n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
id = n1d_saved(NUM)
!
SELECT CASE (isign)
CASE (-1)
CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, &
arr_tmp(1,1), nembed, howmany, 1, arr_tmp(1,1), nembed, howmany, 1, &
FFTW_FORWARD, FFTW_ESTIMATE)
CASE (1)
CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, &
arr_tmp(1,1), nembed, howmany, 1, arr_tmp(1,1), nembed, howmany, 1, &
FFTW_BACKWARD, FFTW_ESTIMATE)
END SELECT
!
DEALLOCATE(arr_tmp, stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOUR1: Dellocation of arr_tmp failed!'
STOP
END IF
!
END SELECT
!
CALL dfftw_execute_dft(plan1d(id,NUM), arr(1,1), arr(1,1))
!
END SUBROUTINE fourrow
END MODULE fft
#endif
!
#if defined(fft_essl)
MODULE fft
!
IMPLICIT NONE
!
PRIVATE
!
PUBLIC :: four1, fourcol, fourrow
!
TYPE pointer_ra
REAL, DIMENSION(:), POINTER :: poi_ra
END TYPE pointer_ra
!
TYPE int_para
INTEGER, DIMENSION(2) :: par
END TYPE int_para
!
! Global parameters
!
INTEGER, PARAMETER :: MXPLAN=16 ! define the maximum number of work arrays.
!
! Global variables
!
EXTERNAL :: ENOTRM
LOGICAL, SAVE :: initflag=.TRUE. ! initialization of the module
CHARACTER (len=8), SAVE :: S2015 ! string to copy error list entry
REAL, DIMENSION(8) :: aux1 ! auxilary array
REAL, DIMENSION(1) :: aux2 ! auxilary array
TYPE(pointer_ra), DIMENSION(:,:), ALLOCATABLE, SAVE :: aux1_poi, aux2_poi ! work arrays for the ESSL routine
TYPE(int_para), DIMENSION(MXPLAN,4), SAVE :: n1d_par ! size of transform
INTEGER, DIMENSION(4), SAVE :: n1d_saved=0 ! number of plans saved
!
INTERFACE fourcol
MODULE PROCEDURE fourcol_ra, fourcol_raa
END INTERFACE
!
CONTAINS
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
SUBROUTINE four1(vec, isign)
!
! Dummy arguments
!
DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: vec
INTEGER, INTENT(IN) :: isign
!
! Local parameters
!
INTEGER, PARAMETER :: NUM=1
!
! Local variables
!
INTEGER :: i, id, istat, k, n, naux1, naux2
!
IF (initflag) THEN
initflag = .FALSE.
!
CALL EINFO(0)
CALL ERRSAV(2015,S2015)
!
ALLOCATE(aux1_poi(MXPLAN,4), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOUR1: 1. Allocation of aux1_poi failed!'
STOP
END IF
!
ALLOCATE(aux2_poi(MXPLAN,4), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOUR1: 1. Allocation of aux2_poi failed!'
STOP
END IF
!
END IF
!
n = SIZE(vec)
!
! test if a plan that fits is already created.
!
id = -1
k = isign*n
DO i = 1,n1d_saved(NUM)
IF (k == n1d_par(i,NUM)%par(1)) THEN
id = i
EXIT
END IF
END DO
!
SELECT CASE (id)
CASE (-1)
!
n1d_saved(NUM) = n1d_saved(NUM)+1
n1d_par(n1d_saved(NUM),NUM)%par(1) = k
id = n1d_saved(NUM)
!
CALL ERRSET(2015,0,-1,1,ENOTRM,0)
!
naux1 = SIZE(aux1)
naux2 = SIZE(aux2)
!
CALL dcft(1, vec(1), 1, n, vec(1), 1, n, &
n, 1, -isign, 1.0, aux1, naux1, aux2, naux2)
!
CALL ERRSTR(2015,S2015)
!
! dynamic allocation of the work arrays.
!
ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOUR1: 2. Allocation of aux1_poi failed!'
STOP
ENDIF
!
ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOUR1: 2. Allocation of aux2_poi failed!'
STOP
ENDIF
!
CALL dcft(1, vec(1), 1, n, vec(1), 1, n, &
n, 1, -isign, 1.0, &
aux1_poi(id,NUM)%poi_ra(1), naux1, &
aux2_poi(id,NUM)%poi_ra(1), naux2)
!
END SELECT
!
CALL dcft(0, vec(1), 1, n, vec(1), 1, n, &
n, 1, -isign, 1.0, &
aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), &
aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra))
!
END SUBROUTINE four1
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
SUBROUTINE fourcol_ra(arr, isign)
!
! Dummy arguments
!
DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
INTEGER, INTENT(IN) :: isign
!
! Local parameters
!
INTEGER, PARAMETER :: NUM=2
!
! Local variables
!
INTEGER :: dim1, howmany, i, id, istat, k, naux1, naux2
!
IF (initflag) THEN
initflag = .FALSE.
!
CALL EINFO(0)
CALL ERRSAV(2015,S2015)
!
ALLOCATE(aux1_poi(MXPLAN,4), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOURCOL_RA: 1. Allocation of aux1_poi failed!'
STOP
END IF
!
ALLOCATE(aux2_poi(MXPLAN,4), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOURCOL_RA: 1. Allocation of aux2_poi failed!'
STOP
END IF
!
END IF
!
dim1 = SIZE(arr,1)
howmany = SIZE(arr,2)
!
! test if a plan that fits is already created.
!
id = -1
k = isign*dim1
DO i = 1,n1d_saved(NUM)
IF (k == n1d_par(i,NUM)%par(1) .AND. &
howmany == n1d_par(i,NUM)%par(2)) THEN
id = i
EXIT
END IF
END DO
!
SELECT CASE (id)
CASE (-1)
!
n1d_saved(NUM) = n1d_saved(NUM)+1
n1d_par(n1d_saved(NUM),NUM)%par(1) = k
n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
id = n1d_saved(NUM)
!
CALL ERRSET(2015,0,-1,1,ENOTRM,0)
!
naux1 = SIZE(aux1)
naux2 = SIZE(aux2)
!
CALL dcft(1, arr(1,1), 1, dim1, arr(1,1), 1, dim1, &
dim1, howmany, -isign, 1.0, aux1, naux1, aux2, naux2)
!
CALL ERRSTR(2015,S2015)
!
! dynamic allocation of the work arrays.
!
ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOURCOL_RA: 2. Allocation of aux1_poi failed!'
STOP
ENDIF
!
ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOURCOL_RA: 2. Allocation of aux2_poi failed!'
STOP
ENDIF
!
CALL dcft(1, arr(1,1), 1, dim1, arr(1,1), 1, dim1, &
dim1, howmany, -isign, 1.0, &
aux1_poi(id,NUM)%poi_ra(1), naux1, &
aux2_poi(id,NUM)%poi_ra(1), naux2)
!
END SELECT
!
CALL dcft(0, arr(1,1), 1, dim1, arr(1,1), 1, dim1, &
dim1, howmany, -isign, 1.0, &
aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), &
aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra))
!
END SUBROUTINE fourcol_ra
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
SUBROUTINE fourcol_raa(arr, isign)
!
! Dummy arguments
!
DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr
INTEGER, INTENT(IN) :: isign
!
! Local parameters
!
INTEGER, PARAMETER :: NUM=3
!
! Local variables
!
INTEGER :: dim1, howmany, i, id, istat, k, naux1, naux2
!
IF (initflag) THEN
initflag = .FALSE.
!
CALL EINFO(0)
CALL ERRSAV(2015,S2015)
!
ALLOCATE(aux1_poi(MXPLAN,NUM), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOURCOL_RAA: 1. Allocation of aux1_poi failed!'
STOP
END IF
!
ALLOCATE(aux2_poi(MXPLAN,NUM), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOURCOL_RAA: 1. Allocation of aux2_poi failed!'
STOP
END IF
!
END IF
!
dim1 = SIZE(arr,1)
howmany = SIZE(arr,2)*SIZE(arr,3)
!
! test if a plan that fits is already created.
!
id = -1
k = isign*dim1
DO i = 1,n1d_saved(NUM)
IF (k == n1d_par(i,NUM)%par(1) .AND. &
howmany == n1d_par(i,NUM)%par(2)) THEN
id = i
EXIT
END IF
END DO
!
SELECT CASE (id)
CASE (-1)
!
n1d_saved(NUM) = n1d_saved(NUM)+1
n1d_par(n1d_saved(NUM),NUM)%par(1) = k
n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
id = n1d_saved(NUM)
!
CALL ERRSET(2015,0,-1,1,ENOTRM,0)
!
naux1 = SIZE(aux1)
naux2 = SIZE(aux2)
!
CALL dcft(1, arr(1,1,1), 1, dim1, arr(1,1,1), 1, dim1, &
dim1, howmany, -isign, 1.0, aux1, naux1, aux2, naux2)
!
CALL ERRSTR(2015,S2015)
!
! dynamic allocation of the work arrays.
!
ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOURCOL_RAA: 2. Allocation of aux1_poi failed!'
STOP
ENDIF
!
ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOURCOL_RAA: 2. Allocation of aux2_poi failed!'
STOP
ENDIF
!
CALL dcft(1, arr(1,1,1), 1, dim1, arr(1,1,1), 1, dim1, &
dim1, howmany, -isign, 1.0, &
aux1_poi(id,NUM)%poi_ra(1), naux1, &
aux2_poi(id,NUM)%poi_ra(1), naux2)
!
END SELECT
!
CALL dcft(0, arr(1,1,1), 1, dim1, arr(1,1,1), 1, dim1, &
dim1, howmany, -isign, 1.0, &
aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), &
aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra))
!
END SUBROUTINE fourcol_raa
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
SUBROUTINE fourrow(arr, isign)
!
! Dummy arguments
!
DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
INTEGER, INTENT(IN) :: isign
!
! Local parameters
!
INTEGER, PARAMETER :: NUM=4
!
! Local variables
!
INTEGER :: dim1, dim2, i, id, istat, k, naux1, naux2
!
IF (initflag) THEN
initflag = .FALSE.
!
CALL EINFO(0)
CALL ERRSAV(2015,S2015)
!
ALLOCATE(aux1_poi(MXPLAN,NUM), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOURROW: 1. Allocation of aux1_poi failed!'
STOP
END IF
!
ALLOCATE(aux2_poi(MXPLAN,NUM), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOURROW: 1. Allocation of aux2_poi failed!'
STOP
END IF
!
END IF
!
dim1 = SIZE(arr,1)
dim2 = SIZE(arr,2)
!
! test if a plan that fits is already created.
!
id = -1
k = isign*dim1
DO i = 1,n1d_saved(NUM)
IF (k == n1d_par(i,NUM)%par(1) .AND. &
dim2 == n1d_par(i,NUM)%par(2)) THEN
id = i
EXIT
END IF
END DO
!
SELECT CASE (id)
CASE (-1)
!
n1d_saved(NUM) = n1d_saved(NUM)+1
n1d_par(n1d_saved(NUM),NUM)%par(1) = k
n1d_par(n1d_saved(NUM),NUM)%par(2) = dim2
id = n1d_saved(NUM)
!
CALL ERRSET(2015,0,-1,1,ENOTRM,0)
!
naux1 = SIZE(aux1)
naux2 = SIZE(aux2)
!
CALL dcft(1, arr(1,1), dim2, 1, arr(1,1), dim2, 1, &
dim1, dim2, -isign, 1.0, aux1, naux1, aux2, naux2)
!
CALL ERRSTR(2015,S2015)
!
! dynamic allocation of the work arrays.
!
ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOURROW: 2. Allocation of aux1_poi failed!'
STOP
ENDIF
!
ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat)
IF (istat /= 0) THEN
WRITE(*,*) 'FOURROW: 2. Allocation of aux2_poi failed!'
STOP
ENDIF
!
CALL dcft(1, arr(1,1), dim1, 1, arr(1,1), dim1, 1, &
dim1, dim2, -isign, 1.0, &
aux1_poi(id,NUM)%poi_ra(1), naux1, &
aux2_poi(id,NUM)%poi_ra(1), naux2)
!
END SELECT
!
CALL dcft(0, arr(1,1), dim1, 1, arr(1,1), dim1, 1, &
dim1, dim2, -isign, 1.0, &
aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), &
aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra))
!
END SUBROUTINE fourrow
END MODULE fft
#endif
!
#if defined(fft_mkl)
MODULE fft
!
USE mkl_dfti
!
IMPLICIT NONE
!
PRIVATE
!
PUBLIC :: pointer_r, handle1d
PUBLIC :: four1, fourcol, fourrow
!
TYPE pointer_r
TYPE(DFTI_DESCRIPTOR), POINTER :: desc_handle
END TYPE pointer_r
!
TYPE int_para
INTEGER, DIMENSION(2) :: par
END TYPE int_para
!
! Global parameters
!
INTEGER, PARAMETER :: MXPLAN=16 ! define the maximum number of plans.
!
! Global variables
!
TYPE(DFTI_DESCRIPTOR), POINTER :: desc_handle
TYPE(pointer_r), DIMENSION(MXPLAN,4), SAVE :: handle1d ! descriptor handles for 1-dim FFT
TYPE(int_para), DIMENSION(MXPLAN,4), SAVE :: n1d_par ! size of transform
INTEGER, DIMENSION(4), SAVE :: n1d_saved=0 ! number of descriptor handles saved
!
INTERFACE fourcol
MODULE PROCEDURE fourcol_ra, fourcol_raa
END INTERFACE
!
CONTAINS
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
SUBROUTINE four1(vec, isign)
!
! Dummy arguments
!
INTEGER, INTENT(IN) :: isign
DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: vec
!
! Local parameters
!
INTEGER, PARAMETER :: NUM=1
!
! Local variables
!
INTEGER :: dim1, id, i, k, status
LOGICAL :: init_flag
!
! test if a plan that fits is already created.
!
id = -1
k = isign*SIZE(vec)
DO i = 1,n1d_saved(NUM)
IF (k == n1d_par(i,NUM)%par(1)) THEN
id = i
EXIT
END IF
END DO
!
SELECT CASE (id)
CASE (-1)
init_flag = .TRUE.
n1d_saved(NUM) = n1d_saved(NUM)+1
n1d_par(n1d_saved(NUM),NUM)%par(1) = k
id = n1d_saved(NUM)
!
CASE default
init_flag = .FALSE.
END SELECT
!
dim1 = SIZE(vec,1)
!
CALL fourcol_mkl(vec(1), dim1, 1, isign, init_flag, id, NUM)
!
END SUBROUTINE four1
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
SUBROUTINE fourcol_ra(arr, isign)
!
! Dummy arguments
!
INTEGER, INTENT(IN) :: isign
DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
!
! Local parameters
!
INTEGER, PARAMETER :: NUM=2
!
! Local variables
!
INTEGER :: dim1, howmany, id, i, k, status
LOGICAL :: init_flag
!
dim1 = SIZE(arr,1)
howmany = SIZE(arr,2)
!
! test if a plan that fits is already created.
!
id = -1
k = isign*SIZE(arr)
DO i = 1,n1d_saved(NUM)
IF (k == n1d_par(i,NUM)%par(1) .AND. &
howmany == n1d_par(i,NUM)%par(2)) THEN
id = i
EXIT
END IF
END DO
!
SELECT CASE (id)
CASE (-1)
init_flag = .TRUE.
n1d_saved(NUM) = n1d_saved(NUM)+1
n1d_par(n1d_saved(NUM),NUM)%par(1) = k
n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
id = n1d_saved(NUM)
!
CASE default
init_flag = .FALSE.
END SELECT
!
CALL fourcol_mkl(arr(1,1), dim1, howmany, isign, init_flag, id, NUM)
!
END SUBROUTINE fourcol_ra
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
SUBROUTINE fourcol_raa(arr, isign)
!
! Dummy arguments
!
INTEGER, INTENT(IN) :: isign
DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr
!
! Local parameters
!
INTEGER, PARAMETER :: NUM=3
!
! Local variables
!
INTEGER :: dim1, howmany, id, i, k, status
LOGICAL :: init_flag
!
dim1 = SIZE(arr,1)
howmany = SIZE(arr,2)*SIZE(arr,3)
!
! test if a plan that fits is already created.
!
id = -1
k = isign*SIZE(arr)
DO i = 1,n1d_saved(NUM)
IF (k == n1d_par(i,NUM)%par(1) .AND. &
howmany == n1d_par(i,NUM)%par(2)) THEN
id = i
EXIT
END IF
END DO
!
SELECT CASE (id)
CASE (-1)
init_flag = .TRUE.
n1d_saved(NUM) = n1d_saved(NUM)+1
n1d_par(n1d_saved(NUM),NUM)%par(1) = k
n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
id = n1d_saved(NUM)
!
CASE default
init_flag = .FALSE.
END SELECT
!
CALL fourcol_mkl(arr(1,1,1), dim1, howmany, isign, init_flag, id, NUM)
!
END SUBROUTINE fourcol_raa
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
SUBROUTINE fourrow(arr, isign)
!
! Dummy arguments
!
INTEGER, INTENT(IN) :: isign
DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
!
! Local parameters
!
INTEGER, PARAMETER :: NUM=4
!
! Local variables
!
INTEGER :: dim2, howmany, id, i, k, status
LOGICAL :: init_flag
!
howmany = SIZE(arr,1)
dim2 = SIZE(arr,2)
!
! test if a plan that fits is already created.
!
id = -1
k = isign*SIZE(arr)
DO i = 1,n1d_saved(NUM)
IF (k == n1d_par(i,NUM)%par(1) .AND. &
howmany == n1d_par(i,NUM)%par(2)) THEN
id = i
EXIT
END IF
END DO
!
SELECT CASE (id)
CASE (-1)
init_flag = .TRUE.
n1d_saved(NUM) = n1d_saved(NUM)+1
n1d_par(n1d_saved(NUM),NUM)%par(1) = k
n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
id = n1d_saved(NUM)
!
CASE default
init_flag = .FALSE.
END SELECT
!
CALL fourrow_mkl(arr(1,1), howmany, dim2, isign, init_flag, id, NUM)
!
END SUBROUTINE fourrow
END MODULE fft
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
SUBROUTINE fourcol_mkl(arr, dim1, howmany, isign, init_flag, id, num)
!
! COMMENT: This subroutine is necessary to prevent the Lahey/Fujitsu
! compiler from making a copy of array arr when passing
! arguments.
USE fft, ONLY: handle1d
!
USE mkl_dfti
!
IMPLICIT NONE
!
! Dummy arguments
!
DOUBLE COMPLEX, DIMENSION(*), INTENT(INOUT) :: arr
INTEGER, INTENT(IN) :: dim1
INTEGER, INTENT(IN) :: howmany
INTEGER, INTENT(IN) :: isign
LOGICAL, INTENT(IN) :: init_flag
INTEGER, INTENT(IN) :: id
INTEGER, INTENT(IN) :: num
!
! Local variables
!
INTEGER :: i, status
!
IF (init_flag) THEN
!
status = DftiCreateDescriptor(handle1d(id,num)%desc_handle, &
DFTI_DOUBLE, DFTI_COMPLEX, 1, dim1)
IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 0:', DftiErrorMessage(status)
status = DftiSetValue(handle1d(id,num)%desc_handle, &
DFTI_NUMBER_OF_TRANSFORMS, howmany)
IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 1:', DftiErrorMessage(status)
status = DftiSetValue(handle1d(id,num)%desc_handle, &
DFTI_INPUT_DISTANCE, dim1)
IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 2:', DftiErrorMessage(status)
status = DftiSetValue(handle1d(id,num)%desc_handle, &
DFTI_OUTPUT_DISTANCE, dim1)
IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 3:', DftiErrorMessage(status)
!
status = DftiCommitDescriptor(handle1d(id,num)%desc_handle)
IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 4:', DftiErrorMessage(status)
!
END IF
!
SELECT CASE (isign)
CASE (-1)
status = DftiComputeForward(handle1d(id,num)%desc_handle, arr)
CASE (1)
status = DftiComputeBackward(handle1d(id,num)%desc_handle, arr)
END SELECT
!
END SUBROUTINE fourcol_mkl
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
SUBROUTINE fourrow_mkl(arr, howmany, dim2, isign, init_flag, id, num)
!
! COMMENT: This subroutine is necessary to prevent the Lahey/Fujitsu
! compiler from making a copy of array arr when passing
! arguments.
!
USE fft, ONLY: handle1d
!
USE mkl_dfti
!
IMPLICIT NONE
!
! Dummy arguments
!
DOUBLE COMPLEX, DIMENSION(*), INTENT(INOUT) :: arr
INTEGER, INTENT(IN) :: howmany
INTEGER, INTENT(IN) :: dim2
INTEGER, INTENT(IN) :: isign
LOGICAL, INTENT(IN) :: init_flag
INTEGER, INTENT(IN) :: id
INTEGER, INTENT(IN) :: num
!
! Local variables
!
INTEGER :: i, status
INTEGER, DIMENSION(2) :: stride
!
IF (init_flag) THEN
!
stride(1) = 0
stride(2) = howmany
!
status = DftiCreateDescriptor(handle1d(id,num)%desc_handle, &
DFTI_DOUBLE, DFTI_COMPLEX, 1, dim2)
IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 0:', DftiErrorMessage(status)
status = DftiSetValue(handle1d(id,num)%desc_handle, &
DFTI_NUMBER_OF_TRANSFORMS, dim2)
IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 1:', DftiErrorMessage(status)
!
status = DftiSetValue(handle1d(id,num)%desc_handle, &
DFTI_INPUT_DISTANCE, 1)
IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 2:', DftiErrorMessage(status)
status = DftiSetValue(handle1d(id,num)%desc_handle, &
DFTI_INPUT_STRIDES, stride)
IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 3:', DftiErrorMessage(status)
!
status = DftiSetValue(handle1d(id,num)%desc_handle, &
DFTI_OUTPUT_DISTANCE, 1)
IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 4:', DftiErrorMessage(status)
status = DftiSetValue(handle1d(id,num)%desc_handle, &
DFTI_OUTPUT_STRIDES, stride)
IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 5:', DftiErrorMessage(status)
!
status = DftiCommitDescriptor(handle1d(id,num)%desc_handle)
IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 6:', DftiErrorMessage(status)
!
END IF
!
SELECT CASE (isign)
CASE (-1)
status = DftiComputeForward(handle1d(id,num)%desc_handle, arr)
CASE (1)
status = DftiComputeBackward(handle1d(id,num)%desc_handle, arr)
END SELECT
!
END SUBROUTINE fourrow_mkl
#endif
!
diff --git a/fft/fftw_f77.h b/fft/fftw_f77.h
index b825f23..a566a02 100644
--- a/fft/fftw_f77.h
+++ b/fft/fftw_f77.h
@@ -1,53 +1,53 @@
!>
!> @file fftw_f77.h
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
! This file contains PARAMETER statements for various constants
! that can be passed to FFTW routines. You should include
! this file in any FORTRAN program that calls the fftw_f77
! routines (either directly or with an #include statement
! if you use the C preprocessor).
integer FFTW_FORWARD,FFTW_BACKWARD
parameter (FFTW_FORWARD=-1,FFTW_BACKWARD=1)
integer FFTW_REAL_TO_COMPLEX,FFTW_COMPLEX_TO_REAL
parameter (FFTW_REAL_TO_COMPLEX=-1,FFTW_COMPLEX_TO_REAL=1)
integer FFTW_ESTIMATE,FFTW_MEASURE
parameter (FFTW_ESTIMATE=0,FFTW_MEASURE=1)
integer FFTW_OUT_OF_PLACE,FFTW_IN_PLACE,FFTW_USE_WISDOM
parameter (FFTW_OUT_OF_PLACE=0)
parameter (FFTW_IN_PLACE=8,FFTW_USE_WISDOM=16)
integer FFTW_THREADSAFE
parameter (FFTW_THREADSAFE=128)
! Constants for the MPI wrappers:
integer FFTW_TRANSPOSED_ORDER, FFTW_NORMAL_ORDER
integer FFTW_SCRAMBLED_INPUT, FFTW_SCRAMBLED_OUTPUT
parameter(FFTW_TRANSPOSED_ORDER=1, FFTW_NORMAL_ORDER=0)
parameter(FFTW_SCRAMBLED_INPUT=8192)
parameter(FFTW_SCRAMBLED_OUTPUT=16384)
diff --git a/fft/tfft.f90 b/fft/tfft.f90
index 866a66b..18da423 100644
--- a/fft/tfft.f90
+++ b/fft/tfft.f90
@@ -1,141 +1,141 @@
!>
!> @file tfft.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test the FFT routines exported by module fft
!
USE fft
IMPLICIT NONE
INTEGER :: nx=8
DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: a,b,c
DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rn1, rn2
DOUBLE PRECISION :: pi, argx
INTEGER :: ix, nx0=2
!
PRINT*, 'Enter array dim. nx'
READ(*,*) nx
ALLOCATE(a(nx,4), b(nx,4), c(4,nx))
ALLOCATE(rn1(nx,4), rn2(nx,4))
!
! Create initial array
pi = 4.0d0*ATAN(1.0d0)
WRITE(*,*) 'Enter mode nx0'
READ(*,*) nx0
DO ix=0,nx-1
argx = 2.0d0*pi/nx*nx0*ix
a(ix+1,:) = COS(argx)
END DO
!
WRITE(*,*) '-----------------------'
WRITE(*,*) 'Reals of original array'
WRITE(*,*) '-----------------------'
WRITE(*,'(10f10.4)') REAL(a)
!________________________________________________________________________________
!
WRITE(*,*) '-----------------'
WRITE(*,*) 'testing four1 ...'
WRITE(*,*) '-----------------'
!
! Forward transform
b=a
CALL four1(b(:,1), -1)
!
! Check
WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1))
WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1))
!
! Backward transform
CALL four1(b(:,1), 1)
b = b/REAL(nx)
!
! Check
WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1))
WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1))
!
b(:,1) = b(:,1)-a(:,1)
rn1(:,1) = REAL(b(:,1),8)
rn2(:,1) = AIMAG(b(:,1))
PRINT *, 'Min. max err of real', MINVAL(rn1(:,1)), MAXVAL(rn1(:,1))
PRINT *, 'Min. max err of imag', MINVAL(rn2(:,1)), MAXVAL(rn2(:,1))
!________________________________________________________________________________
!
WRITE(*,*) '-------------------'
WRITE(*,*) 'testing fourcol ...'
WRITE(*,*) '-------------------'
!
! Forward transform
b=a
CALL fourcol(b(:,:), -1)
!
! Check
WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1))
WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1))
!
! Backward transform
CALL fourcol(b(:,:), 1)
b = b/REAL(nx)
!
! Check
WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1))
WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1))
!
b = b-a
rn1 = REAL(b,8)
rn2 = AIMAG(b)
PRINT *, 'Min. max err of real', MINVAL(rn1), MAXVAL(rn1)
PRINT *, 'Min. max err of imag', MINVAL(rn2), MAXVAL(rn2)
!________________________________________________________________________________
!
WRITE(*,*) '-------------------'
WRITE(*,*) 'testing fourrow ...'
WRITE(*,*) '-------------------'
!
! Forward transform
b=a
c = TRANSPOSE(b)
CALL fourrow(c, -1)
!
! Check
WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(c(1,:))
WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(c(1,:))
!
! Backward transform
CALL fourrow(c, 1)
c = c/REAL(nx)
!
! Check
WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(c(1,:))
WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(c(1,:))
!
b = TRANSPOSE(c)-a
rn1 = REAL(b,8)
rn2 = AIMAG(b)
PRINT *, 'Min. max err of real', MINVAL(rn1), MAXVAL(rn1)
PRINT *, 'Min. max err of imag', MINVAL(rn2), MAXVAL(rn2)
!
! Clean up
DEALLOCATE(a,b,c, rn1,rn2)
END PROGRAM main
diff --git a/matlab/cds_mat.m b/matlab/cds_mat.m
index e2a79d4..25e1c65 100644
--- a/matlab/cds_mat.m
+++ b/matlab/cds_mat.m
@@ -1,44 +1,44 @@
%
% @file cds_mat.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
function [mata, diag] = cds_mat(file, dset)
n=double(hdf5read(file,dset, 'RANK'));
dists=double(hdf5read(file, strcat(dset,'/dists')));
val=hdf5read(file, strcat(dset,'/vals'));
%% Shift the off-diagonals %%
for k=1:length(dists)
d=dists(k);
if d < 0
val(1:n+d,k) = val(1-d:n,k);
elseif d > 0
val(n:-1:d+1,k) = val(n-d:-1:1,k);
end
end
mata = spdiags(val, dists, n,n);
if nargout == 2
idiag = find(dists==0);
diag = val(:,idiag);
end
diff --git a/matlab/csr_mat.m b/matlab/csr_mat.m
index ddfa679..7dd9ac5 100644
--- a/matlab/csr_mat.m
+++ b/matlab/csr_mat.m
@@ -1,45 +1,45 @@
%
% @file csr_mat.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
function [mata, diag] = csr_mat(file, dset)
n=hdf5read(file,dset, 'RANK');
nnz=hdf5read(file,dset, 'NNZ');
cols=hdf5read(file, strcat(dset,'/cols'));
irow=hdf5read(file, strcat(dset,'/irow'));
val=hdf5read(file, strcat(dset,'/val'));
idiag=hdf5read(file, strcat(dset,'/idiag'));
for i=1:n
s = irow(i);
e = irow(i+1)-1;
rows(s:e) = i;
end
cols=double(cols);
rows=double(rows);
vals = double(val);
mata = sparse(rows,cols,vals);
if nargout == 2
diag = val(idiag);
end
diff --git a/matlab/driv1.m b/matlab/driv1.m
index 2d28ea7..cafb273 100644
--- a/matlab/driv1.m
+++ b/matlab/driv1.m
@@ -1,68 +1,68 @@
%
% @file driv1.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='driv1.h5';
x = hdf5read(file, '/X');
knotsx = hdf5read(file, 'KNOTSX');
splinesx = hdf5read(file, '/splinesx','V71Dimensions', true);
y = hdf5read(file, '/Y');
knotsy = hdf5read(file, 'KNOTSY');
splinesy = hdf5read(file, '/splinesy','V71Dimensions', true);
c=['b','g','r','c','m','y','k'];
nc=size(c,2);
figure
%subplot(211)
hold on
ns = size(splinesx,1);
attr=hdf5read(file,'/splinesx/title'); title_ann=attr.Data;
for i = 1:ns
cc = mod(i-1,nc)+1;
plot(x,splinesx(i,:),c(cc))
end
yk=zeros(size(knotsx));
plot(knotsx,yk,'ro');
grid on
title(title_ann)
ylabel('Splines')
figure
%subplot(212)
hold on
ns = size(splinesy,1);
attr=hdf5read(file,'/splinesy/title'); title_ann=attr.Data;
for i = 1:ns
cc = mod(i-1,nc)+1;
plot(y,splinesy(i,:),c(cc))
end
yk=zeros(size(knotsy));
i1=find(knotsy==y(1));
i2=find(knotsy>=y(size(y,1)),1);
plot(knotsy(i1:i2),yk(i1:i2),'ro');
grid on
title(title_ann)
ylabel('Splines')
diff --git a/matlab/fit.m b/matlab/fit.m
index 64b90e8..7f6674a 100644
--- a/matlab/fit.m
+++ b/matlab/fit.m
@@ -1,38 +1,38 @@
%
% @file fit.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
P1=polyfit(log(n),log(err1),1)
P2=polyfit(log(n),log(err2),1)
P3=polyfit(log(n),log(err3),1)
figure
loglog(n,err1,'o', n, exp(P1(2)).*n.^P1(1), 'b')
hold on
loglog(n,err2,'rh', n, exp(P2(2)).*n.^P2(1),'r')
loglog(n,err3,'*k', n, exp(P3(2)).*n.^P3(1), 'k')
grid on
xlabel('Number of intervals N');
ylabel('Discretization Error')
title('2D Cylindrical problem with m=1, s=10')
diff --git a/matlab/fit1d.m b/matlab/fit1d.m
index 6475e77..f96663d 100644
--- a/matlab/fit1d.m
+++ b/matlab/fit1d.m
@@ -1,94 +1,94 @@
%
% @file fit1d.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='fit1d.h5';
x = hdf5read(file, '/X');
f = hdf5read(file, '/FCALC');
fexact = hdf5read(file, '/FEXACT');
error = hdf5read(file, '/ERROR');
f1 = hdf5read(file, '/FCALC1');
fexact1 = hdf5read(file, '/FEXACT1');
error1 = hdf5read(file, '/ERROR1');
splines = hdf5read(file, '/SPLINES'); splines = splines';
%
% Attributes
%
nidbas = hdf5read(file,'/NIDBAS');
nx = hdf5read(file,'/NX');
attr=hdf5read(file,'/FEXACT/title'); fexact_ann=attr.Data;
attr=hdf5read(file,'/FCALC/title'); f_ann=attr.Data;
attr=hdf5read(file,'/ERROR/title'); error_ann=attr.Data;
attr=hdf5read(file,'/FEXACT1/title'); fexact1_ann=attr.Data;
attr=hdf5read(file,'/FCALC1/title'); f1_ann=attr.Data;
attr=hdf5read(file,'/ERROR1/title'); error1_ann=attr.Data;
label=sprintf('Splines of degree %d, NX =%d', nidbas, nx);
ns = size(splines,1);
c=['b','g','r','c','m','y','k'];
nc=size(c,2);
figure
subplot(511)
hold on
for i = 1:ns
cc = mod(i-1,nc)+1;
plot(x,splines(i,:),c(cc))
end
grid on
ylabel('Splines')
xlabel('X')
title(label);
hold off
subplot(512)
plot(x, f, 'o', x, fexact)
legend(f_ann, fexact_ann)
xlabel('X')
grid on
subplot(513)
plot(x, error)
ylabel(error_ann)
xlabel('X')
grid on
subplot(514)
plot(x, f1, 'h', x, fexact1)
legend(f1_ann, fexact1_ann)
xlabel('X')
grid on
subplot(515)
plot(x, error1)
ylabel(error1_ann)
xlabel('X')
grid on
diff --git a/matlab/fit2d.m b/matlab/fit2d.m
index f225459..3d69116 100644
--- a/matlab/fit2d.m
+++ b/matlab/fit2d.m
@@ -1,80 +1,80 @@
%
% @file fit2d.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='fit2d.h5';
%
% Get data from data sets
%
r=hdf5read(file,'/xpt');
t=hdf5read(file,'/ypt');
fcalc=hdf5read(file,'/fcalc');
fexact=hdf5read(file,'/fexact');
errs=hdf5read(file,'/errs');
%
% Attributes
%
NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
NIDBAS1=hdf5read(file,'/NIDBAS1');
NIDBAS2=hdf5read(file,'/NIDBAS2');
MBESS=hdf5read(file,'/MBESS');
LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ...
NIDBAS1, NIDBAS2, MBESS);
attr=hdf5read(file,'/xpt/title'); x_ann=attr.Data;
attr=hdf5read(file,'/ypt/title'); y_ann=attr.Data;
attr=hdf5read(file,'/fcalc/title'); fcalc_ann=attr.Data;
attr=hdf5read(file,'/fexact/title'); fexact_ann=attr.Data;
attr=hdf5read(file,'/errs/title');errs_ann=attr.Data;
[R,T]=meshgrid(r,t);
x = R.*cos(T); y= R.*sin(T);
figure
subplot(221)
pcolor(double(x),double(y),double(fcalc));
shading interp
xlabel('X'); ylabel('Y')
title(LABEL)
colorbar
subplot(222)
pcolor(double(x),double(y),double(fexact))
shading interp
axis image
xlabel('X'); ylabel('Y')
title('X-Y plane')
colorbar
subplot(223)
surfc(double(x),double(y),double(errs))
xlabel('X'); ylabel('Y');
title(errs_ann)
%% Plot error at theta ~ pi/4
k = max(find(t.
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
function [yfit, P] = fitlog(x,y)
P=polyfit(log(x),log(y),1);
yfit = exp(P(2)).*x.^P(1);
diff --git a/matlab/fourier_gs.m b/matlab/fourier_gs.m
index 09bbb05..d8e94bb 100644
--- a/matlab/fourier_gs.m
+++ b/matlab/fourier_gs.m
@@ -1,69 +1,69 @@
%
% @file fourier_gs.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
clear all
tau=0;
alpha=0.5;
theta=-pi:0.02:pi;
[x,y] = meshgrid(theta,theta);
n1=length(theta);
n2=n1;
%%%%
%%%% Gauss-Seidel relaxations
%%%%
str_title=sprintf('tau = %.1f, alpha = %.2f',tau, alpha);
ee = exp(i.*theta);
eep= conj(ee);
csin= alpha.*complex(alpha, (tau/2).*imag(ee));
G=zeros(n1,n2);
for ii=1:n1
for jj=1:n2
num = ee(ii) + csin(ii)*ee(jj);
G(ii,jj) = num / (2*(1+alpha^2) - conj(num));
end
end
figure
hold off
G0=(ee+csin)./(2*(1+alpha^2)-(eep+conj(csin)));
plot(theta, abs(G(:,1)), 'r', 'LineWidth', 2)
hold on
plot(theta, abs(G0), 'g', 'LineWidth', 2)
for jj=1:20:n2
plot(theta, abs(G(:,jj)), 'b')
end
xlabel('\theta_1'); ylabel('Amplification Factor for Gauss-Seidel')
title(str_title)
% $$$ figure
% $$$ mesh(x,y,abs(G))
% $$$ xlabel('\theta_1'); ylabel('\theta_2')
% $$$ title(str_title);
% $$$ view(-120,25)
max(max(abs(G)))
diff --git a/matlab/fourier_jac.m b/matlab/fourier_jac.m
index 4e9da71..ae518ab 100644
--- a/matlab/fourier_jac.m
+++ b/matlab/fourier_jac.m
@@ -1,69 +1,69 @@
%
% @file fourier_jac.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
clear all
omega=0.8;
tau=-2;
alpha=1;
c=2*omega/(1+alpha^2);
str_title=sprintf('omega = %.1f, tau = %.1f, alpha = %.2f', omega, ...
tau, alpha)
theta1=-pi:0.01:pi;
theta2=-pi:0.01:pi;
[x,y] = meshgrid(theta1,theta2);
n1=length(theta1);
n2=length(theta2);
%%%%
%%%% Damped Jacobi relaxations
%%%%
G=zeros(n1,n2);
for ii=1:n1
for jj=1:n2
G(ii,jj) = 1-c.*( sin(theta1(ii)/2)^2 + alpha^2*sin(theta2(jj)/2)^2 ...
+ 0.25*alpha*tau*sin(theta1(ii))*sin(theta2(jj)) );
end
end
figure
hold off
G0 = 1-c.*sin(theta1./2).^2;
plot(theta1, G(:,1), 'r', 'LineWidth', 2)
hold on
plot(theta1, G0, 'g', 'LineWidth', 2)
for jj=1:20:n2
plot(theta1, G(:,jj), 'b')
end
xlabel('\theta_1'); ylabel('Amplification Factor for Jacobi')
title(str_title)
% $$$ figure
% $$$ mesh(x,y,G)
% $$$ xlabel('\theta_1'); ylabel('\theta_2')
% $$$ title(str_title);
max(max(abs(G)))
diff --git a/matlab/fourier_smooth.m b/matlab/fourier_smooth.m
index 45e8a7d..ed3ddd8 100644
--- a/matlab/fourier_smooth.m
+++ b/matlab/fourier_smooth.m
@@ -1,106 +1,106 @@
%
% @file fourier_smooth.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
% Brute force computation of \mu = MAX |G| for
% damped Jacobi and Gauss-Seidel relaxation
%
% Find optimal damping factor \omega for Jacobi relaxations
%
if ~exist('tau','var'), tau=0; end
if ~exist('alpha','var'), alpha=1.0; end
dth=0.01;
theta1=-pi+dth:dth:pi;
theta2=-pi+dth:dth:pi;
nth1=length(theta1);
nth2=length(theta2);
sint12=sin(theta1./2).^2;
sint22=sin(theta2./2).^2;
sint1=sin(theta1);
sint2=sin(theta2);
[S1,S2]=meshgrid(sint1,sint2);
ctau=0.25*alpha*tau;
omega=0.5:0.002:1;
n=length(omega);
G = zeros(nth1,nth2);
for i=1:n
c=2*omega(i)/(1+alpha^2);
for i1=1:nth1
for i2=1:nth2
if or(abs(theta1(i1))>= pi/2, abs(theta2(i2)) >= pi/2);
G(i1,i2) = abs(1-c*(sint12(i1)+alpha^2*sint22(i2) + ...
ctau*sint1(i1)*sint2(i2)));
end
end
end
[gmax,imax]=max(G);
[mu(i),jmax]=max(gmax);
theta1_opt(i) = theta1(imax(jmax));
theta2_opt(i) = theta2(jmax);
end
[mu_min,i_min]=min(mu);
omega_opt=omega(i_min);
str_title=sprintf(['omega = %.3f, mu = %.3f, alpha = %.2f, tau = ' ...
'%.1f'], omega_opt, mu_min, alpha, tau);
figure
subplot(211)
plot(omega,mu,'LineWidth',2);
xlabel('\omega'); ylabel('\mu')
grid on
title(str_title);
subplot(212)
plot(omega, theta1_opt, omega, theta2_opt,'LineWidth',2);
legend('\theta_{1opt}', '\theta_{2opt}')
xlabel('\omega'); ylabel('optimum \theta')
grid on
%
% \mu for Gauss-Seidel relaxation
%
Ggs = zeros(nth1,nth2);
exp1=complex(cos(theta1),sin(theta1));
exp2=complex(cos(theta2),sin(theta2));
c=2*(1+alpha^2);
ctau=complex(alpha^2, (0.5*alpha*tau).*sint1);
for i1=1:nth1
for i2=1:nth2
if or(abs(theta1(i1))>= pi/2, abs(theta2(i2)) >= pi/2);
Num = exp1(i1) + ctau(i1)*exp2(i2);
Ggs(i1,i2) = abs( Num/(c-conj(Num)) );
end
end
end
[gmax,imax]=max(Ggs);
[mugs,jmax]=max(gmax);
theta1_gs_opt = theta1(imax(jmax));
theta2_gs_opt = theta2(jmax);
fprintf('alpha = %.2f, tau = %.1f, Ggs = %.4f, theta1 = %.4f, theta2 = %.4f\n', alpha, tau, ...
mugs, theta1_gs_opt, theta2_gs_opt);
subplot(211)
hold on
plot(omega,repmat(mugs,1,length(omega)),'r--','LineWidth',2)
diff --git a/matlab/gb_mat.m b/matlab/gb_mat.m
index a15b31d..64440af 100644
--- a/matlab/gb_mat.m
+++ b/matlab/gb_mat.m
@@ -1,43 +1,43 @@
%
% @file gb_mat.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
function [mata] = gb_mat(file, dset)
rank=h5readatt(file, dset, 'RANK');
ku=h5readatt(file, dset, 'KU');
kl=h5readatt(file, dset, 'KL');
gbmat=h5read(file, dset);
m=rank; n=rank;
mata = zeros(m,n);
for i=1:m
jmin = max(1,i-kl);
jmax = min(n,i+ku);
for j=jmin:jmax
ib = kl+ku+i-j+1;
mata(i,j)=gbmat(ib,j);
end
end
clear gbmat;
\ No newline at end of file
diff --git a/matlab/gbmat.m b/matlab/gbmat.m
index 3bb7c5e..dc14e74 100644
--- a/matlab/gbmat.m
+++ b/matlab/gbmat.m
@@ -1,60 +1,60 @@
%
% @file gbmat.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='pde2d.h5';
%
% Attributes of GB matrix
%
NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
NIDBAS1=hdf5read(file,'/NIDBAS1');
NIDBAS2=hdf5read(file,'/NIDBAS2');
%mat='/MAT1';
rank=hdf5read(file,strcat(mat,'/RANK'));
ku=hdf5read(file,strcat(mat,'/KU'));
kl=hdf5read(file,strcat(mat,'/KL'));
gb_mat=hdf5read(file,mat);
rhs0=hdf5read(file,'/RHS');
sol0=hdf5read(file,'/SOL');
%
m=rank; n=rank;
a = zeros(m,n);
for i=1:m
jmin = max(1,i-kl);
jmax = min(n,i+ku);
for j=jmin:jmax
ib = kl+ku+i-j+1;
a(i,j)=gb_mat(ib,j);
end
end
%clear gb_mat;
S = sparse(a);
%clear a;
figure
spy(S);
LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), rank = %d', NR, NTH, ...
NIDBAS1, NIDBAS2, rank);
title(LABEL)
diff --git a/matlab/gs_fd.m b/matlab/gs_fd.m
index 8e8c1b6..e812c93 100644
--- a/matlab/gs_fd.m
+++ b/matlab/gs_fd.m
@@ -1,66 +1,66 @@
%
% @file gs_fd.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
clear all
N=1024;
L=100;
n0=1;
Narr=[128 256 512 1024];
for i=1:length(Narr);
N=Narr(i);
h=L/N; h2=h^2;
title_str=sprintf(['N=%d, L=%.1f, n0=%.1f'], N, L, n0);
v0=-2.+n0*h2;
u0=1.;
dom(i)= 2*abs(u0)/abs(v0);
v=v0.*ones(N,1);
u=u0*ones(N-1,1);
mata=diag(u,1) + diag(u,-1) + diag(v);
matl= tril(mata,0); % D+L
lambda = eig(-triu(mata,1),matl);
rho(i)=max(abs(lambda));
% $$$ figure
% $$$ plot(lambda,'o')
% $$$ xlabel('Real eigenvalue'), ylabel('Imag eigenvalue')
% $$$ grid on
% $$$ axis equal
% $$$ title_str=sprintf(['N=%d, L=%.1f, n0=%.1f, Spec. Radius=%.4f, dom=%.4f'], N, L, ...
% $$$ n0, rho(i),dom(i));
% $$$ title(title_str);
fprintf(1, 'Spectral Radius of GS relaxation matrix = %.4f, dom=%.4f\n', ...
rho(i),dom(i))
end
figure
plot(Narr, rho,'o-')
xlabel('N'); ylabel('GS spectral radius')
title_str=sprintf(['FD scheme, L=%.1f, n0=%.1f'], L, n0);
title(title_str);
grid on
diff --git a/matlab/gs_fe.m b/matlab/gs_fe.m
index 7dfaa33..9f77cec 100644
--- a/matlab/gs_fe.m
+++ b/matlab/gs_fe.m
@@ -1,66 +1,66 @@
%
% @file gs_fe.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
clear all
N=1024;
L=100;
n0=1;
Narr=[128 256 512 1024];
for i=1:length(Narr);
N=Narr(i);
h=L/N; h2=h^2;
title_str=sprintf(['N=%d, L=%.1f, n0=%.1f'], N, L, n0);
v0=-2.+2.0*n0*h2/6.0;
u0=1.+n0*h2/6.0;
dom(i)= 2*abs(u0)/abs(v0);
v=v0.*ones(N,1);
u=u0*ones(N-1,1);
mata=diag(u,1) + diag(u,-1) + diag(v);
matl= tril(mata,0); % D+L
lambda = eig(-triu(mata,1),matl);
rho(i)=max(abs(lambda));
% $$$ figure
% $$$ plot(lambda,'o')
% $$$ xlabel('Real eigenvalue'), ylabel('Imag eigenvalue')
% $$$ grid on
% $$$ axis equal
% $$$ title_str=sprintf(['N=%d, L=%.1f, n0=%.1f, Spec. Radius=%.4f, dom=%.4f'], N, L, ...
% $$$ n0, rho(i),dom(i));
% $$$ title(title_str);
fprintf(1, 'Spectral Radius of GS relaxation matrix = %.4f, dom=%.4f\n', ...
rho(i),dom(i))
end
figure
plot(Narr, rho,'o-')
xlabel('N'); ylabel('GS spectral radius')
title_str=sprintf(['FE scheme, L=%.1f, n0=%.1f'], L, n0);
title(title_str);
grid on
diff --git a/matlab/h5Complex.m b/matlab/h5Complex.m
index ab338f9..c2f8774 100644
--- a/matlab/h5Complex.m
+++ b/matlab/h5Complex.m
@@ -1,50 +1,50 @@
%
% @file h5Complex.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
function z = h5Complex(file, dset)
data = hdf5read(file, dset);
dims = size(data);
rank = size(dims,2);
switch rank
case {1}
for i=1:dims(1)
z(i)=complex(cell2mat(data(i,1).Data(1)), cell2mat(data(i,1).Data(2)));
end
case {2}
for i=1:dims(1)
for j=1:dims(2)
z(i,j)=complex(cell2mat(data(i,j).Data(1)), cell2mat(data(i,j).Data(2)));
end
end
case {3}
for i=1:dims(1)
for j=1:dims(2)
for k=1:dims(3)
z(i,j,k)=complex(cell2mat(data(i,j,k).Data(1)), cell2mat(data(i,j,k).Data(2)));
end
end
end
end
diff --git a/matlab/h5Complex_ll.m b/matlab/h5Complex_ll.m
index ad43007..de9bf9d 100644
--- a/matlab/h5Complex_ll.m
+++ b/matlab/h5Complex_ll.m
@@ -1,33 +1,33 @@
%
% @file h5Complex_ll.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
function z = h5Complex_ll(file, dset)
fid=H5F.open(file, 'H5F_ACC_RDONLY', 'H5P_DEFAULT');
dset_id=H5D.open(fid, dset);
dxpl = 'H5P_DEFAULT';
data = H5D.read(dset_id,'H5ML_DEFAULT','H5S_ALL','H5S_ALL', dxpl);
z = complex(data.real, data.imaginary);
H5D.close(dset_id);
H5F.close(fid);
diff --git a/matlab/jac_opt.m b/matlab/jac_opt.m
index 82d6f70..04b2c09 100644
--- a/matlab/jac_opt.m
+++ b/matlab/jac_opt.m
@@ -1,78 +1,78 @@
%
% @file jac_opt.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
%
% Find optimal damping factor \omega for Jacobi relaxations
%
clear all
alpha=0.5;
theta2=pi/2:0.01:pi;
theta1=0:0.01:pi;
[x,y]=meshgrid(theta1,theta1);
sint12=sin(theta1./2).^2;
sint22=sin(theta2./2).^2;
[ksi,eta]=meshgrid(sint12,sint22);
nth1=length(theta1);
nth2=length(theta2);
omega=0.1:0.01:1;
n=length(omega);
for i=1:n
c=2*omega(i)/(1+alpha^2);
G=abs(1 - c.*(ksi+(alpha^2).*eta ));
[gmax,imax]=max(G);
[mu(i),jmax]=max(gmax);
eta_opt(i)=eta(imax(jmax),jmax);
ksi_opt(i)=ksi(imax(jmax),jmax);
end
[mu_min,i_min]=min(mu);
omega_opt=omega(i_min);
str_title=sprintf('omega = %.3f, mu = %.3f, alpha = %.2f', omega_opt, ...
mu_min, alpha);
figure
plot(omega,mu,'o-', omega, ksi_opt, '*-', omega, eta_opt, '^-');
legend('\mu', '\xi_{opt}', '\eta_{opt}')
xlabel('\omega'); ylabel('\mu')
grid on
title(str_title);
% $$$
% $$$ c=2*omega_opt/(1+alpha^2);
% $$$ G=1-c.*(ksi+(alpha^2).*eta);
% $$$ figure
% $$$ hold off
% $$$ plot(theta1, G(1,:), 'r', 'LineWidth', 2)
% $$$ hold on
% $$$ plot(theta1, G(nth2,:), 'g', 'LineWidth', 2)
% $$$ for jj=1:20:nth2
% $$$ plot(theta1, G(jj,:), 'b')
% $$$ end
% $$$ xlabel('\theta_1'); ylabel('Amplification Factor for Jacobi')
% $$$ title(str_title)
% $$$ grid on
diff --git a/matlab/modes.m b/matlab/modes.m
index ccb5c06..0dd4521 100644
--- a/matlab/modes.m
+++ b/matlab/modes.m
@@ -1,84 +1,84 @@
%
% @file modes.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
N=8;
kmodes=N-1;
%% modes on fine grid %%
xh=(1/N).*(0:1:N);
wh=zeros(N+1,kmodes);
for k=1:kmodes
wh(:,k) = sin((k*pi).*xh);
end
%% Restriction %%
R=[
1 0 0 0 0 0 0 0 0
0 0.5 1 0.5 0 0 0 0 0
0 0 0 0.5 1 0.5 0 0 0
0 0 0 0 0 0.5 1 0.5 0
0 0 0 0 0 0 0 0 1
];
%% Null space of Restriction %%
ns = [
0 0 0 0
2 0 0 0
-1 -1 0 0
0 2 0 0
0 -1 -1 0
0 0 2 0
0 0 -1 -1
0 0 0 2
0 0 0 0];
figure
subplot(211)
plot(ns,'o-');
title('Basis of Null space of Restriction')
subplot(212)
plot(R','o-');
title('Basis of Range of Prolongation')
%% modes on coarse grid %%
N2h = N/2;
x2h=(1/N2h).*(0:1:N2h);
w2h = R*wh;
x=0:0.01:1.;
figure
for k=1:kmodes
subplot(3,3,k)
plot(xh,wh(:,k),'o', x, sin((k*pi).*x),'b-', x2h, w2h(:,k), 'ro-');
grid on
end
figure
for k=1:N/2
subplot(2,2,k)
plot(xh,wh(:,k),'o-', xh,wh(:,N-k),'r*-')
grid on
end
diff --git a/matlab/pde1d.m b/matlab/pde1d.m
index 86b6642..3f67aa7 100644
--- a/matlab/pde1d.m
+++ b/matlab/pde1d.m
@@ -1,72 +1,72 @@
%
% @file pde1d.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='pde1d.h5';
fprintf(1,'NX = %d\n', hdf5read(file,'/NX'));
fprintf(1,'NIDBAS = %d\n', hdf5read(file,'/NIDBAS'));
fprintf(1,'NGAUSS = %d\n', hdf5read(file,'/NGAUSS'));
fprintf(1,'KDIFF = %d\n', hdf5read(file,'/KDIFF'));
x = hdf5read(file, '/XGRID');
f= hdf5read(file, '/SOLCAL');
fexact= hdf5read(file, '/SOLANA');
err=hdf5read(file, '/ERR');
f=f';
fexact=fexact';
err=err';
figure
subplot(311)
plot(x,f(1,:),'o',x,fexact(1,:))
xlabel('X')
ylabel('Function')
grid on
subplot(312)
plot(x,f(2,:),'o',x,fexact(2,:))
xlabel('X')
ylabel('1st Derivative')
grid on
subplot(313)
plot(x,f(3,:),'o',x,fexact(3,:))
xlabel('X')
ylabel('2nd Derivative')
grid on
figure
subplot(311)
plot(x,err(1,:),'o-')
xlabel('X');
ylabel('Errors on function');
grid on
subplot(312)
plot(x,err(2,:),'o-')
xlabel('X');
ylabel('Errors on 1st derivative');
grid on
subplot(313)
plot(x,err(3,:),'o-')
xlabel('X');
ylabel('Errors on 2nd derivative');
grid on
diff --git a/matlab/pde1d_eig.m b/matlab/pde1d_eig.m
index 8b39b43..70b5434 100644
--- a/matlab/pde1d_eig.m
+++ b/matlab/pde1d_eig.m
@@ -1,30 +1,30 @@
%
% @file pde1d_eig.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='pde1d_eig.h5';
%%% get sparse matrix and its diagonal elelments in dig %%%
[mata,diag]=zcsr_mat(file,'/MAT');
spy(mata,12);
diff --git a/matlab/pde1d_eig_zcsr.m b/matlab/pde1d_eig_zcsr.m
index c6fa3e7..5cbe4d1 100644
--- a/matlab/pde1d_eig_zcsr.m
+++ b/matlab/pde1d_eig_zcsr.m
@@ -1,30 +1,30 @@
%
% @file pde1d_eig_zcsr.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='pde1d_eig.h5';
%%% get sparse matrix and its diagonal elelments in dig %%%
[mata,diag]=zcsr_mat(file,'/MAT');
spy(mata,12);
diff --git a/matlab/pde1d_eig_zmumps.m b/matlab/pde1d_eig_zmumps.m
index 2a431bd..b9c721c 100644
--- a/matlab/pde1d_eig_zmumps.m
+++ b/matlab/pde1d_eig_zmumps.m
@@ -1,55 +1,55 @@
%
% @file pde1d_eig_zmumps.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='pde1d_eig_zmumps.h5';
%%% Read mumps matrix and convert to Matlab sparse format
[mata,diag_ele]=zmumps_mat(file,'/MAT');
n=size(mata,1);
%spy(mata,12);
arpack_vals=h5Complex_ll(file,'/eig_vals');
nev=size(arpack_vals,1);
arpack_vecs = h5Complex_ll(file, '/eig_vecs');
%%% Compute eigen values and vectors, using EIGS
[V,D,FLAG]=eigs(mata,nev,'SM');
[eigs_vals,perm]=sort(diag(D));
eigs_vecs=V(:,perm);
fprintf('Eigenvalues from Arpack and Matlab eigs\n');
for i=1:nev
fprintf('%i (%.5e %.5e), %.5e\n',i,real(arpack_vals(i)),imag(arpack_vals(i)),eigs_vals(i));
end
fprintf('Norm of difference %.3e\n', norm(arpack_vals-eigs_vals,Inf));
%%% Renormalize EIGS %%%
fprintf('\n\nDiff of Eigenvectors from Arpack and Matlab eigs\n');
for i=1:nev
nrm=arpack_vecs(1,i);
eigs_vecs(:,i) = (eigs_vecs(:,i)./eigs_vecs(1,i)).*nrm;
diff_vecs = norm(arpack_vecs(:,i)-eigs_vecs(:,i),Inf);
fprintf('%i %10.3e\n', i, diff_vecs);
end
diff --git a/matlab/pde1dp.m b/matlab/pde1dp.m
index 97b7a6e..e464d62 100644
--- a/matlab/pde1dp.m
+++ b/matlab/pde1dp.m
@@ -1,41 +1,41 @@
%
% @file pde1dp.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='pde1dp.h5';
fprintf(1,'NX = %d\n', hdf5read(file,'/NX'));
fprintf(1,'NIDBAS = %d\n', hdf5read(file,'/NIDBAS'));
fprintf(1,'NGAUSS = %d\n', hdf5read(file,'/NGAUSS'));
mata=hdf5read(file,'/mata');
xpts = hdf5read(file, '/rhs/x');
frhs = hdf5read(file, '/rhs/f');
figure
plot(xpts,frhs);
xlabel('X')
ylabel('RHS')
grid on
diff --git a/matlab/pde1dp_cmpl.m b/matlab/pde1dp_cmpl.m
index e07fc31..53f7348 100644
--- a/matlab/pde1dp_cmpl.m
+++ b/matlab/pde1dp_cmpl.m
@@ -1,49 +1,49 @@
%
% @file pde1dp_cmpl.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='pde1dp_cmpl.h5';
xgrid=hdf5read(file,'/xgrid');
nx = size(xgrid)-1;
rhs=h5Complex(file, '/rhs');
sol=h5Complex(file, '/sol');
%mat=h5Complex(file, '/mat');
x=hdf5read(file,'/x');
solana=h5Complex(file,'/solana');
solcal=h5Complex(file,'/solcal');
err=hdf5read(file, '/err');
figure
subplot(211)
plot(x, real(solana),x,imag(solana),x,real(solcal),'o',x, ...
imag(solcal), '*')
legend('Exact Real', 'Exact Imag', 'Calc. Real', 'Calc. Imag')
xlabel('X'); ylabel('SOL');
subplot(212)
plot(x, err, 'o-');
xlabel('X'); ylabel('|Error|')
grid on
diff --git a/matlab/pde1dp_cmpl_dft.m b/matlab/pde1dp_cmpl_dft.m
index 10aa61a..72c8b9d 100644
--- a/matlab/pde1dp_cmpl_dft.m
+++ b/matlab/pde1dp_cmpl_dft.m
@@ -1,70 +1,70 @@
%
% @file pde1dp_cmpl_dft.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='pde1dp_cmpl_dft.h5';
xgrid=hdf5read(file,'/xgrid');
nx = size(xgrid)-1;
mode=0:nx-1;
rhs=h5Complex(file, '/rhs');
sol=h5Complex(file, '/sol');
rhs_fft=h5Complex(file, '/rhs_fft');
sol_fft=h5Complex(file, '/sol_fft');
mat=h5Complex(file, '/mat');
x=hdf5read(file,'/x');
solana=h5Complex(file,'/solana');
solcal=h5Complex(file,'/solcal');
err=hdf5read(file, '/err');
figure
subplot(211)
plot(x, real(solana),x,imag(solana),x,real(solcal),'o',x, ...
imag(solcal), '*')
legend('Exact Real', 'Exact Imag', 'Calc. Real', 'Calc. Imag')
xlabel('X'); ylabel('SOL');
subplot(212)
plot(x, err, 'o-');
xlabel('X'); ylabel('|Error|')
grid on
figure
subplot(311)
plot(mode, real(mat), 'o', mode, imag(mat), '*')
xlabel('mode'); ylabel('DFT of MAT')
legend('Real', 'Imag')
grid on
subplot(312)
plot(mode, real(rhs_fft), 'o', mode, imag(rhs_fft), '*')
xlabel('mode'); ylabel('DFT of RHS')
legend('Real', 'Imag')
grid on
subplot(313)
plot(mode, real(sol_fft), 'o', mode, imag(sol_fft), '*')
xlabel('mode'); ylabel('DFT of SOL')
legend('Real', 'Imag')
grid on
diff --git a/matlab/pde1dp_cmpl_pardiso.m b/matlab/pde1dp_cmpl_pardiso.m
index 1878e8e..bcc75b6 100644
--- a/matlab/pde1dp_cmpl_pardiso.m
+++ b/matlab/pde1dp_cmpl_pardiso.m
@@ -1,72 +1,72 @@
%
% @file pde1dp_cmpl_pardiso.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='pde1dp_cmpl_pardiso.h5';
xgrid=hdf5read(file,'/xgrid');
nx = size(xgrid)-1;
rhs=h5Complex(file, '/rhs');
sol=h5Complex(file, '/sol');
cols=hdf5read(file, '/MAT/cols');
irow=hdf5read(file, '/MAT/irow');
val=h5Complex(file, '/MAT/val');
perm=hdf5read(file, '/MAT/perm');
cols=double(cols);
irow=double(irow);
perm=double(perm);
n = size(perm,1);
nnz=size(val,1);
rows = zeros(nnz,1);
for i=1:n
s = irow(i);
e = irow(i+1)-1;
rows(s:e) = i;
end
mat = sparse(rows,cols,val);
figure
subplot(121);
spy(mat);
subplot(122);
spy(mat(perm,perm));
x=hdf5read(file,'/x');
solana=h5Complex(file,'/solana');
solcal=h5Complex(file,'/solcal');
err=hdf5read(file, '/err');
figure
subplot(211)
plot(x, real(solana),x,imag(solana),x,real(solcal),'o',x, ...
imag(solcal), '*')
legend('Exact Real', 'Exact Imag', 'Calc. Real', 'Calc. Imag')
xlabel('X'); ylabel('SOL');
subplot(212)
plot(x, err, 'o-');
xlabel('X'); ylabel('|Error|')
grid on
diff --git a/matlab/pde2d.m b/matlab/pde2d.m
index c375cef..e3ee9fe 100644
--- a/matlab/pde2d.m
+++ b/matlab/pde2d.m
@@ -1,85 +1,85 @@
%
% @file pde2d.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='pde2d.h5';
%
% Get data from data sets
%
r=hdf5read(file,'/xgrid');
t=hdf5read(file,'/ygrid');
sol=hdf5read(file,'/sol')';
solexact=hdf5read(file,'/solana')';
err=hdf5read(file,'/errors')';
solr=hdf5read(file,'/derivx')';
solt=hdf5read(file,'/derivy')';
%
% Attributes
%
NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
NIDBAS1=hdf5read(file,'/NIDBAS1');
NIDBAS2=hdf5read(file,'/NIDBAS2');
MBESS=hdf5read(file,'/MBESS');
LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ...
NIDBAS1, NIDBAS2, MBESS);
attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data;
attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data;
attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data;
attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data;
attr=hdf5read(file,'/errors/title');err_ann=attr.Data;
[R,T]=meshgrid(r,t);
x = R.*cos(T); y= R.*sin(T);
solx = cos(T).*solr - sin(T)./R.*solt;
soly = sin(T).*solr + cos(T)./R.*solt;
figure
subplot(221)
pcolor(double(r),double(t),double(sol));
shading interp
hold on, quiver(r,t,solr,solt)
xlabel(x_ann); ylabel(y_ann)
title(LABEL)
colorbar
subplot(222)
pcolor(double(x),double(y),double(sol))
shading interp
hold on, quiver(x,y,solx,soly)
hold off, axis image
xlabel('X'); ylabel('Y')
title('X-Y plane')
colorbar
subplot(223)
surfc(double(x),double(y),double(sol))
xlabel('X'); ylabel('Y');
title(sol_ann)
subplot(224)
surfc(double(x),double(y),double(err))
xlabel('X'); ylabel('Y');
title(err_ann)
diff --git a/matlab/pde2d_mumps.m b/matlab/pde2d_mumps.m
index 268f2ed..62950d8 100644
--- a/matlab/pde2d_mumps.m
+++ b/matlab/pde2d_mumps.m
@@ -1,97 +1,97 @@
%
% @file pde2d_mumps.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='pde2d_mumps.h5';
nr=hdf5read(file,'/', 'NX');
nth=hdf5read(file,'/', 'NY');
NIDBAS1=hdf5read(file,'/','NIDBAS1');
NIDBAS2=hdf5read(file,'/','NIDBAS2');
MBESS=hdf5read(file,'/','MBESS');
r=hdf5read(file,'/xgrid');
t=hdf5read(file,'/ygrid');
sol=hdf5read(file,'/sol')';
solexact=hdf5read(file,'/solana')';
err=hdf5read(file,'/errors')';
solr=hdf5read(file,'/derivx')';
solt=hdf5read(file,'/derivy')';
LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', nr, nth, ...
NIDBAS1, NIDBAS2, MBESS);
figure
subplot(211)
plot(r, sol(nth/2,:), 'o', r, solexact(nth/2,:))
xlabel('r')
grid on
title(LABEL)
subplot(212)
if MBESS == 0
plot(t, sol(:,1), 'o', t, solexact(:,1))
else
plot(t, sol(:,nr/2), 'o', t, solexact(:,nr/2))
end
xlabel('\theta')
grid on
% $$$ if verLessThan('matlab', '7.9');
% $$$ n = hdf5read(file,'/MAT/RANK');
% $$$ nnz = hdf5read(file,'/MAT/NNZ');
% $$$ nlsym = hdf5read(file,'/MAT/NLSYM');
% $$$ else
% $$$ n = hdf5read(file,'/MAT/', 'RANK');
% $$$ nnz = hdf5read(file,'/MAT/', 'NNZ');
% $$$ nlsym = hdf5read(file,'/MAT/', 'NLSYM');
% $$$ end
% $$$
% $$$ cols=hdf5read(file, '/MAT/cols');
% $$$ irow=hdf5read(file, '/MAT/irow');
% $$$ val=hdf5read(file, '/MAT/val');
% $$$ perm=hdf5read(file, '/MAT/perm');
% $$$
% $$$ rows = zeros(nnz,1);
% $$$ cols=double(cols);
% $$$ irow=double(irow);
% $$$ perm=double(perm);
% $$$
% $$$ for i=1:n
% $$$ s = irow(i);
% $$$ e = irow(i+1)-1;
% $$$ rows(s:e) = i;
% $$$ end
% $$$
% $$$ mat = sparse(rows,cols,val);
% $$$ figure
% $$$ subplot(121);
% $$$ spy(mat(perm,perm));
% $$$ title('Matrix structure')
% $$$ subplot(122);
% $$$ spy(chol(mat(perm,perm)));
% $$$ title('Factor L^T')
diff --git a/matlab/pde2d_nh.m b/matlab/pde2d_nh.m
index cd12593..b60a4fe 100644
--- a/matlab/pde2d_nh.m
+++ b/matlab/pde2d_nh.m
@@ -1,101 +1,101 @@
%
% @file pde2d_nh.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='pde2d_nh.h5';
%
% Get data from data sets
%
r=hdf5read(file,'/xgrid');
t=hdf5read(file,'/ygrid');
sol=hdf5read(file,'/sol')';
solexact=hdf5read(file,'/solana')';
err=hdf5read(file,'/errors')';
errx=hdf5read(file,'/errors_x')';
erry=hdf5read(file,'/errors_y')';
solr=hdf5read(file,'/derivx')';
solt=hdf5read(file,'/derivy')';
%
% Attributes
%
NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
NIDBAS1=hdf5read(file,'/NIDBAS1');
NIDBAS2=hdf5read(file,'/NIDBAS2');
MBESS=hdf5read(file,'/MBESS');
LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ...
NIDBAS1, NIDBAS2, MBESS);
attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data;
attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data;
attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data;
attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data;
attr=hdf5read(file,'/errors/title');err_ann=attr.Data;
[R,T]=meshgrid(r,t);
x = R.*cos(T); y= R.*sin(T);
solx = cos(T).*solr - sin(T)./R.*solt;
soly = sin(T).*solr + cos(T)./R.*solt;
figure
subplot(221)
pcolor(double(r),double(t),double(sol));
shading interp
hold on, quiver(r,t,solr,solt)
xlabel(x_ann); ylabel(y_ann)
title(LABEL)
colorbar
subplot(222)
pcolor(double(x),double(y),double(sol))
shading interp
hold on, quiver(x,y,solx,soly)
hold off, axis image
xlabel('X'); ylabel('Y')
title('X-Y plane')
colorbar
subplot(223)
surfc(double(x),double(y),double(sol))
xlabel('X'); ylabel('Y');
title(sol_ann)
subplot(224)
surfc(double(x),double(y),double(err))
xlabel('X'); ylabel('Y');
title(err_ann)
figure
subplot(311)
plot(t,err(:,NR+1),'o-')
xlabel('\theta'); ylabel('Error on solution')
grid on
title('Error on Boundary r=1');
subplot(312)
plot(t,errx(:,NR+1),'o-')
xlabel('\theta'); ylabel('Error on x-derivative')
grid on
subplot(313)
plot(t,erry(:,NR+1),'o-')
xlabel('\theta'); ylabel('Error on y-derivative')
grid on
diff --git a/matlab/pde2d_pardiso.m b/matlab/pde2d_pardiso.m
index 1ec3ae3..5b4604c 100644
--- a/matlab/pde2d_pardiso.m
+++ b/matlab/pde2d_pardiso.m
@@ -1,63 +1,63 @@
%
% @file pde2d_pardiso.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='pde2d_pardiso.h5';
if verLessThan('matlab', '7.9');
n = hdf5read(file,'/MAT/RANK');
nnz = hdf5read(file,'/MAT/NNZ');
nlsym = hdf5read(file,'/MAT/NLSYM');
else
n = hdf5read(file,'/MAT/', 'RANK');
nnz = hdf5read(file,'/MAT/', 'NNZ');
nlsym = hdf5read(file,'/MAT/', 'NLSYM');
end
cols=hdf5read(file, '/MAT/cols');
irow=hdf5read(file, '/MAT/irow');
val=hdf5read(file, '/MAT/val');
perm=hdf5read(file, '/MAT/perm');
rows = zeros(nnz,1);
cols=double(cols);
irow=double(irow);
perm=double(perm);
for i=1:n
s = irow(i);
e = irow(i+1)-1;
rows(s:e) = i;
end
mat = sparse(rows,cols,val);
figure
subplot(121);
spy(mat(perm,perm));
title('Matrix structure')
subplot(122);
spy(chol(mat(perm,perm)));
title('Factor L^T')
diff --git a/matlab/pde2d_sym_pardiso.m b/matlab/pde2d_sym_pardiso.m
index 7eb2c8f..b23f0a7 100644
--- a/matlab/pde2d_sym_pardiso.m
+++ b/matlab/pde2d_sym_pardiso.m
@@ -1,136 +1,136 @@
%
% @file pde2d_sym_pardiso.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='pde2d_sym_pardiso.h5';
%
% Get data from data sets
%
r=hdf5read(file,'/xgrid');
t=hdf5read(file,'/ygrid');
sol=hdf5read(file,'/sol')';
solexact=hdf5read(file,'/solana')';
err=hdf5read(file,'/errors')';
solr=hdf5read(file,'/derivx')';
solt=hdf5read(file,'/derivy')';
%
% Attributes
%
if verLessThan('matlab', '7.9');
NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
NIDBAS1=hdf5read(file,'/NIDBAS1');
NIDBAS2=hdf5read(file,'/NIDBAS2');
MBESS=hdf5read(file,'/MBESS');
EPSI=hdf5read(file,'/EPSI');
attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data;
attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data;
attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data;
attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data;
attr=hdf5read(file,'/errors/title');err_ann=attr.Data;
else
NR=hdf5read(file,'/','NX'); NTH=hdf5read(file,'/','NY');
NIDBAS1=hdf5read(file,'/','NIDBAS1');
NIDBAS2=hdf5read(file,'/','NIDBAS2');
MBESS=hdf5read(file,'/','MBESS');
EPSI=hdf5read(file,'/','EPSI');
attr=hdf5read(file,'/xgrid/','title'); x_ann=attr.Data;
attr=hdf5read(file,'/ygrid/','title'); y_ann=attr.Data;
attr=hdf5read(file,'/sol/','title'); sol_ann=attr.Data;
attr=hdf5read(file,'/solana/','title'); solexact_ann=attr.Data;
attr=hdf5read(file,'/errors/','title');err_ann=attr.Data;
end
LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d, epsi = %3.2f', ...
NR, NTH, NIDBAS1, NIDBAS2, MBESS, EPSI);
[R,T]=meshgrid(r,t);
x = R.*cos(T); y= R.*sin(T);
solx = cos(T).*solr - sin(T)./R.*solt;
soly = sin(T).*solr + cos(T)./R.*solt;
figure
subplot(221)
pcolor(double(r),double(t),double(sol));
shading interp
hold on, quiver(r,t,solr,solt)
xlabel(x_ann); ylabel(y_ann)
title(LABEL)
colorbar
subplot(222)
pcolor(double(x),double(y),double(sol))
shading interp
hold on, quiver(x,y,solx,soly)
hold off, axis image
xlabel('X'); ylabel('Y')
title('X-Y plane')
colorbar
subplot(223)
surfc(double(x),double(y),double(sol))
xlabel('X'); ylabel('Y');
title(sol_ann)
subplot(224)
surfc(double(x),double(y),double(err))
xlabel('X'); ylabel('Y');
title(err_ann)
if verLessThan('matlab', '7.9');
n = hdf5read(file,'/MAT/RANK');
nnz = hdf5read(file,'/MAT/NNZ');
nlsym = hdf5read(file,'/MAT/NLSYM');
else
n = hdf5read(file,'/MAT/', 'RANK');
nnz = hdf5read(file,'/MAT/', 'NNZ');
nlsym = hdf5read(file,'/MAT/', 'NLSYM');
end
cols=hdf5read(file, '/MAT/cols');
irow=hdf5read(file, '/MAT/irow');
val=hdf5read(file, '/MAT/val');
perm=hdf5read(file, '/MAT/perm');
rows = zeros(nnz,1);
cols=double(cols);
irow=double(irow);
perm=double(perm);
for i=1:n
s = irow(i);
e = irow(i+1)-1;
rows(s:e) = i;
end
mat = sparse(rows,cols,val);
figure
subplot(121)
spy(mat)
title('Original Matrix structure')
subplot(122)
spy(mat(perm,perm))
title('Permuted Matrix structure')
diff --git a/matlab/pde2d_sym_pardiso_dft.m b/matlab/pde2d_sym_pardiso_dft.m
index 5b0f948..b5103b7 100644
--- a/matlab/pde2d_sym_pardiso_dft.m
+++ b/matlab/pde2d_sym_pardiso_dft.m
@@ -1,171 +1,171 @@
%
% @file pde2d_sym_pardiso_dft.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
mat_disp=1;
file='pde2d_sym_pardiso_dft.h5';
if verLessThan('matlab', '7.9');
n = hdf5read(file,'/MAT1/RANK');
nnz = hdf5read(file,'/MAT1/NNZ');
nlsym = hdf5read(file,'/MAT1/NLSYM');
NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
NIDBAS1=hdf5read(file,'/NIDBAS1');
NIDBAS2=hdf5read(file,'/NIDBAS2');
MBESS=hdf5read(file,'/MBESS');
EPSI=hdf5read(file,'/EPSI');
KMIN=hdf5read(file,'/KMIN');
KMAX=hdf5read(file,'/KMAX');
else
n = hdf5read(file,'/MAT1/', 'RANK');
nnz = hdf5read(file,'/MAT1/', 'NNZ');
nlsym = hdf5read(file,'/MAT1/', 'NLSYM');
NR=hdf5read(file,'/', 'NX'); NTH=hdf5read(file,'/', 'NY');
NIDBAS1=hdf5read(file,'/', 'NIDBAS1');
NIDBAS2=hdf5read(file,'/', 'NIDBAS2');
MBESS=hdf5read(file,'/', 'MBESS');
EPSI=hdf5read(file,'/','EPSI');
KMIN=hdf5read(file,'/','KMIN');
KMAX=hdf5read(file,'/','KMAX');
end
LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d, epsi = %3.2f', NR, NTH, ...
NIDBAS1, NIDBAS2, MBESS, EPSI);
DK = KMAX-KMIN+1;
attr=hdf5read(file,'/xgrid/','title'); x_ann=attr.Data;
attr=hdf5read(file,'/ygrid/','title'); y_ann=attr.Data;
attr=hdf5read(file,'/sol/','title'); sol_ann=attr.Data;
attr=hdf5read(file,'/solana/','title'); solexact_ann=attr.Data;
attr=hdf5read(file,'/errors/','title');err_ann=attr.Data;
if mat_disp == 1
cols=hdf5read(file, '/MAT1/cols');
irow=hdf5read(file, '/MAT1/irow');
val=h5Complex(file, '/MAT1/val');
perm=hdf5read(file, '/MAT1/perm');
rows = zeros(nnz,1);
cols=double(cols);
irow=double(irow);
perm=double(perm);
for i=1:n
s = irow(i);
e = irow(i+1)-1;
rows(s:e) = i;
end
valr=real(val); vali=imag(val);
mat = sparse(rows,cols,valr);
figure
subplot(121)
spy(mat,8)
title('Original Matrix structure')
subplot(122)
spy(mat(perm,perm),8)
title('Permuted Matrix structure')
end
r=hdf5read(file,'/xgrid');
t=hdf5read(file,'/ygrid');
sol=hdf5read(file,'/sol')';
solexact=hdf5read(file,'/solana')';
err=hdf5read(file,'/errors')';
solr=hdf5read(file,'/derivx')';
solt=hdf5read(file,'/derivy')';
[R,T]=meshgrid(r,t);
x = R.*cos(T); y= R.*sin(T);
solx = cos(T).*solr - sin(T)./R.*solt;
soly = sin(T).*solr + cos(T)./R.*solt;
figure
subplot(221)
pcolor(r,t,sol);
shading interp
hold on, quiver(r,t,solr,solt)
xlabel(x_ann); ylabel(y_ann)
title(LABEL)
colorbar
subplot(222)
pcolor(x,y,sol)
shading interp
hold on, quiver(x,y,solx,soly)
hold off, axis image
xlabel('X'); ylabel('Y')
title('X-Y plane')
colorbar
subplot(223)
surfc(x,y,sol)
xlabel('X'); ylabel('Y');
title(sol_ann)
subplot(224)
surfc(x,y,err)
xlabel('X'); ylabel('Y');
title(err_ann)
figure
ft_sol=h5Complex(file,'/FT_SOL');
ft_sol=reshape(ft_sol,DK,[]);
m=[KMIN:KMAX]; sp=1:NR+NIDBAS1;
subplot(121)
stem3(sp,m, real(ft_sol), 'filled')
shading interp
xlabel('Radial spline number'); ylabel('m')
title('Real(\phi)')
subplot(122)
stem3(sp,m, imag(ft_sol),'filled')
shading interp
xlabel('Radial spline number'); ylabel('m')
title('Imag(\phi)')
figure
ft_rhs=h5Complex(file,'/FT_RHS');
ft_rhs=reshape(ft_rhs,DK,[]);
m=[KMIN:KMAX]; sp=1:NR+NIDBAS1;
subplot(121)
stem3(sp,m, real(ft_rhs), 'filled')
shading interp
xlabel('Radial spline number'); ylabel('m')
title('Real(\rho)')
subplot(122)
stem3(sp,m, imag(ft_rhs),'filled')
shading interp
xlabel('Radial spline number'); ylabel('m')
title('Imag(\rho)')
figure
energy_k = h5Complex(file,'/ENERGY_K');
subplot(211)
stem(m, real(energy_k));
xlabel('m'); ylabel('Real(\phi)'); title('Spectral energy')
subplot(212)
stem(m, imag(energy_k));
xlabel('m'); ylabel('Imag(\phi)'); title('Spectral energy')
diff --git a/matlab/pde3d.m b/matlab/pde3d.m
index f8cb948..17bb7d5 100644
--- a/matlab/pde3d.m
+++ b/matlab/pde3d.m
@@ -1,124 +1,124 @@
%
% @file pde3d.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='pde3d.h5';
%
% Get data from data sets
%
x=hdf5read(file,'/xgrid');
y=hdf5read(file,'/ygrid');
z=hdf5read(file,'/zgrid');
%
nx=size(x);
ny=size(y);
nz=size(z);
%
rhs=hdf5read(file,'/RHS');
coefs=hdf5read(file,'/SOL');
bcoef=hdf5read(file,'/BCOEF');
sol=hdf5read(file,'/sol');
solexact=hdf5read(file,'/solana');
solx=hdf5read(file,'/derivx');
soly=hdf5read(file,'/derivy');
solz=hdf5read(file,'/derivz');
solx_exact=hdf5read(file,'/derivx_exact');
soly_exact=hdf5read(file,'/derivy_exact');
solz_exact=hdf5read(file,'/derivz_exact');
figure
k=ceil(nz(1)/2);
subplot(211);
pcolor(x,y,sol(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Caculated solution')
colorbar
subplot(212);
pcolor(x,y,solexact(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Analytical solution')
colorbar
figure
err=sol-solexact;
pcolor(x,y,err(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Discretization error')
colorbar
figure
subplot(211);
pcolor(x,y,solx(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Caculated d/dx')
colorbar
subplot(212);
pcolor(x,y,solx_exact(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Analytical d/dx')
colorbar
figure
k=ceil(nz(1)/2);
subplot(211);
pcolor(x,y,soly(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Caculated d/dy')
colorbar
subplot(212);
pcolor(x,y,soly_exact(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Analytical d/dy')
colorbar
figure
k=ceil(nz(1)/6);
subplot(211);
pcolor(x,y,solz(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Caculated d/dz')
colorbar
subplot(212);
pcolor(x,y,solz_exact(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Analytical d/dz')
colorbar
figure
i=ceil(nx(1)/2);
plot(z, squeeze(sol(i,1,:)), 'o', z, squeeze(solexact(i,1,:)),'r')
xlabel('z');
diff --git a/matlab/poisson_fe.m b/matlab/poisson_fe.m
index 92012f6..a12b08b 100644
--- a/matlab/poisson_fe.m
+++ b/matlab/poisson_fe.m
@@ -1,123 +1,123 @@
%
% @file poisson_fe.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='poisson_fe.h5';
%
nx=h5readatt(file,'/','NX');
ny=h5readatt(file,'/','NY');
kx=h5readatt(file,'/','KX');
ky=h5readatt(file,'/','KY');
nidbas1=h5readatt(file,'/','NIDBAS1');
nidbas2=h5readatt(file,'/','NIDBAS2');
relax=h5readatt(file,'/','RELAX');
nlevels=h5readatt(file,'/','LEVELS');
nu1=h5readatt(file,'/','NU1');
nu2=h5readatt(file,'/','NU2');
mu=h5readatt(file,'/','MU');
title_str=sprintf(['N=(%d,%d), NIDBAS=(%d,%d), relax=%s, nu1=%d, ' ...
'nu2=%d, mu=%d, LEVELS=%d, KX=%d, KY=%d'], ...
nx,ny,nidbas1,nidbas2,relax,nu1,nu2,mu,nlevels,kx,ky);
%
% Prolongation matrices at the coarsest grid
%
levels=nlevels;
mglevel=sprintf('/mglevels/level.%.2d', levels);
dset=strcat(mglevel,'/matpx');
matpx=h5read(file,dset);
dset=strcat(mglevel,'/matpy');
matpy=h5read(file,dset);
%
% FE matrix at the finest grid
%
levels=1;
mglevel=sprintf('/mglevels/level.%.2d', levels);
dset=strcat(mglevel,'/mata');
[mata,diag]=csr_mat(file,dset);
f=h5read(file,strcat(mglevel,'/f'));
v=h5read(file,strcat(mglevel,'/v'));
f1d=h5read(file,strcat(mglevel,'/f1d'));
v1d=h5read(file,strcat(mglevel,'/v1d'));
% $$$ figure
% $$$ spy(mata)
%
% Solutions at the finest grid
%
x=h5read(file,'/solutions/xg');
y=h5read(file,'/solutions/yg');
dense=h5read(file,'/solutions/dense');
sol_anal=h5read(file,'/solutions/anal');
sol_calc=h5read(file,'/solutions/calc');
sol_direct=h5read(file,'/solutions/direct');
nx=int32(size(x,1));
ny=int32(size(y,1));
figure
surf(x,y,sol_direct'-sol_anal')
xlabel('X'); ylabel('Y');
title('Error on the finest grid')
figure
subplot(211)
[yy,iy] = max(abs(sol_anal),[],2);
[xx,ix] = max(yy);
iy0=iy(ix);
str=sprintf('Solution at y = %.4f', y(iy0));
plot(x, sol_anal(:,iy0),x, sol_direct(:,iy0),'o')
xlabel('x'); ylabel(str);
grid on
legend('Analytic Solution', 'Direct Solution')
title(title_str)
subplot(212)
[xx,ix] = max(abs(sol_anal));
[yy,iy] = max(xx);
ix0=ix(iy);
str=sprintf('Solution at x = %.4f', x(ix0));
plot(y, sol_anal(ix0,:),y, sol_direct(ix0,:),'o')
xlabel('y'); ylabel(str);
grid on
title(title_str)
%
% Iterations
%
dset='/Iterations/';
disc_err=h5read(file, strcat(dset,'disc_errors'));
resid=h5read(file, strcat(dset,'residues'));
its=0:1:size(resid,1)-1;
figure
subplot(211)
semilogy(its,resid,'o-')
grid on
xlabel('Iterations'); ylabel('Norm of residue');
title(title_str);
subplot(212)
semilogy(its,disc_err,'h-')
grid on
xlabel('Iterations'); ylabel('Norm of error');
diff --git a/matlab/poisson_mg.m b/matlab/poisson_mg.m
index e407dd6..10f23a3 100644
--- a/matlab/poisson_mg.m
+++ b/matlab/poisson_mg.m
@@ -1,140 +1,140 @@
%
% @file poisson_mg.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
if ~exist('file'), file='poisson_mg.h5'; end
%
nx=h5readatt(file,'/','NX');
ny=h5readatt(file,'/','NY');
lx=h5readatt(file,'/','LX');
ly=h5readatt(file,'/','LY');
kx=h5readatt(file,'/','KX');
ky=h5readatt(file,'/','KY');
beta=h5readatt(file,'/','BETA');
omega=h5readatt(file,'/','OMEGA');
relax=h5readatt(file,'/','RELAX');
mat_type=h5readatt(file,'/','MAT_TYPE');
nlevels=h5readatt(file,'/','LEVELS');
mu=h5readatt(file,'/','MU');
nnu=h5readatt(file,'/','NNU');
nu1=h5read(file,'/nu1');
nu2=h5read(file,'/nu2');
title_str=sprintf(['N=(%d,%d), Lx=%d, Ly=%d, beta=%.4f, relax=%s, V(%d,%d), ' ...
'LEVELS=%d, KX=%d, KY=%d'], ...
nx,ny,lx,ly,beta,relax,nu1(nnu),nu2(nnu),nlevels,kx,ky);
%
% Prolongation matrices at the coarsest grid
%
levels=nlevels;
mglevel=sprintf('/mglevels/level.%.2d', levels);
dset=strcat(mglevel,'/matpx');
matpx=csr_mat(file,dset);
dset=strcat(mglevel,'/matpy');
matpy=csr_mat(file,dset);
%
% FE matrix at the finest grid
%
levels=1;
mglevel=sprintf('/mglevels/level.%.2d', levels);
dset=strcat(mglevel,'/mata');
if mat_type == 'csr'
[mata,diag]=csr_mat(file,dset);
else
[mata,diag]=cds_mat(file,dset);
end
n=size(diag,1);
% $$$ figure
% $$$ spy(mata)
%
% Spectral radius of GS Iteration Matrix
% Rg = -(D+L)^(-1) * U
%
% $$$ matl= tril(mata,0); % D+L
% $$$ lambda = eigs(-triu(mata,1),matl)
% $$$ fprintf(1, 'Spectral Radius of GS relaxation matrix = %g\n', max(abs(lambda)))
% $$$ figure
% $$$ plot(lambda, 'o')
% $$$ axis equal
% $$$ grid on
% $$$ xlabel('Real of eigenvalues'); ylabel('Imag of eigenvalues')
% $$$ title(title_str)
%
% Solutions at the finest grid
%
dense=h5read(file,'/dense');
x=h5read(file,'/solutions/xg');
y=h5read(file,'/solutions/yg');
sol_anal=h5read(file,'/solutions/anal');
%sol_direct=h5read(file,'/solutions/direct');
sol_calc=h5read(file,'/solutions/calc');
figure
% $$$ surf(x,y,sol_calc'-sol_anal')
pcolor(x,y,sol_calc'-sol_anal')
shading interp
colorbar
xlabel('X'); ylabel('Y'); zlabel('Error');
title(title_str)
figure
subplot(211)
[yy,iy] = max(abs(sol_anal),[],2);
[xx,ix] = max(yy);
iy0=iy(ix);
str=sprintf('Solution at y = %.4f', y(iy0));
plot(x, sol_anal(:,iy0),x, sol_calc(:,iy0),'o')
xlabel('x'); ylabel(str);
grid on
legend('Analytic Solution', 'MG Solution')
title(title_str)
subplot(212)
[xx,ix] = max(abs(sol_anal));
[yy,iy] = max(xx);
ix0=ix(iy);
str=sprintf('Solution at x = %.4f', x(ix0));
plot(y, sol_anal(ix0,:),y, sol_calc(ix0,:),'o')
xlabel('y'); ylabel(str);
grid on
title(title_str)
%
% Iterations
%
dset='/Iterations/';
disc_err=h5read(file, strcat(dset,'disc_errors'));
resid=h5read(file, strcat(dset,'residues'));
its=0:1:size(resid,1)-1;
figure
subplot(211)
semilogy(its,resid,'o-')
grid on
xlabel('Iterations'); ylabel('Norm of residue');
title(title_str);
subplot(212)
semilogy(its,disc_err,'h-')
grid on
xlabel('Iterations'); ylabel('Norm of error');
diff --git a/matlab/ppde3d.m b/matlab/ppde3d.m
index ed333f2..de73cc6 100644
--- a/matlab/ppde3d.m
+++ b/matlab/ppde3d.m
@@ -1,145 +1,145 @@
%
% @file ppde3d.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='ppde3d.h5';
%
% Get data from data sets
%
x=hdf5read(file,'/xgrid');
y=hdf5read(file,'/ygrid');
z=hdf5read(file,'/zgrid');
%
nx=size(x);
ny=size(y);
nz=size(z);
%
sol=hdf5read(file,'/sol');
solexact=hdf5read(file,'/solana');
solx=hdf5read(file,'/derivx');
soly=hdf5read(file,'/derivy');
solz=hdf5read(file,'/derivz');
solx_exact=hdf5read(file,'/derivx_exact');
soly_exact=hdf5read(file,'/derivy_exact');
solz_exact=hdf5read(file,'/derivz_exact');
figure
k=ceil(nz(1)/2);
subplot(311);
pcolor(x,y,sol(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Caculated solution')
colorbar
subplot(312);
pcolor(x,y,solexact(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Analytical solution')
colorbar
subplot(313);
err=sol-solexact;
pcolor(x,y,err(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Discretization error')
colorbar
figure
subplot(311);
pcolor(x,y,solx(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Caculated d/dx')
colorbar
subplot(312);
pcolor(x,y,solx_exact(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Analytical d/dx')
colorbar
subplot(313);
err=solx-solx_exact;
pcolor(x,y,err(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Discretization error')
colorbar
figure
k=ceil(nz(1)/2);
subplot(311);
pcolor(x,y,soly(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Caculated d/dy')
colorbar
subplot(312);
pcolor(x,y,soly_exact(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Analytical d/dy')
colorbar
subplot(313);
err=soly-soly_exact;
pcolor(x,y,err(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Discretization error')
colorbar
figure
k=ceil(nz(1)/6);
subplot(311);
pcolor(x,y,solz(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Caculated d/dz')
colorbar
subplot(312);
pcolor(x,y,solz_exact(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Analytical d/dz')
colorbar
subplot(313);
err=solz-solz_exact;
pcolor(x,y,err(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Discretization error')
colorbar
figure
i=ceil(nx(1)/2);
plot(z, squeeze(sol(i,1,:)), 'o', z, squeeze(solexact(i,1,:)),'r')
xlabel('z');
diff --git a/matlab/ppde3d_pb.m b/matlab/ppde3d_pb.m
index 0702db8..8541704 100644
--- a/matlab/ppde3d_pb.m
+++ b/matlab/ppde3d_pb.m
@@ -1,145 +1,145 @@
%
% @file ppde3d_pb.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='ppde3d_pb.h5';
%
% Get data from data sets
%
x=hdf5read(file,'/xgrid');
y=hdf5read(file,'/ygrid');
z=hdf5read(file,'/zgrid');
%
nx=size(x);
ny=size(y);
nz=size(z);
%
sol=hdf5read(file,'/sol');
solexact=hdf5read(file,'/solana');
solx=hdf5read(file,'/derivx');
soly=hdf5read(file,'/derivy');
solz=hdf5read(file,'/derivz');
solx_exact=hdf5read(file,'/derivx_exact');
soly_exact=hdf5read(file,'/derivy_exact');
solz_exact=hdf5read(file,'/derivz_exact');
figure
k=ceil(nz(1)/2);
subplot(311);
pcolor(x,y,sol(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Caculated solution')
colorbar
subplot(312);
pcolor(x,y,solexact(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Analytical solution')
colorbar
subplot(313);
err=sol-solexact;
pcolor(x,y,err(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Discretization error')
colorbar
figure
subplot(311);
pcolor(x,y,solx(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Caculated d/dx')
colorbar
subplot(312);
pcolor(x,y,solx_exact(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Analytical d/dx')
colorbar
subplot(313);
err=solx-solx_exact;
pcolor(x,y,err(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Discretization error')
colorbar
figure
k=ceil(nz(1)/2);
subplot(311);
pcolor(x,y,soly(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Caculated d/dy')
colorbar
subplot(312);
pcolor(x,y,soly_exact(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Analytical d/dy')
colorbar
subplot(313);
err=soly-soly_exact;
pcolor(x,y,err(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Discretization error')
colorbar
figure
k=ceil(nz(1)/6);
subplot(311);
pcolor(x,y,solz(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Caculated d/dz')
colorbar
subplot(312);
pcolor(x,y,solz_exact(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Analytical d/dz')
colorbar
subplot(313);
err=solz-solz_exact;
pcolor(x,y,err(:,:,k)');
shading interp
xlabel('x'); ylabel('y')
title('Discretization error')
colorbar
figure
i=ceil(nx(1)/2);
plot(z, squeeze(sol(i,1,:)), 'o', z, squeeze(solexact(i,1,:)),'r')
xlabel('z');
diff --git a/matlab/ppoisson_fd.m b/matlab/ppoisson_fd.m
index a573749..e098384 100644
--- a/matlab/ppoisson_fd.m
+++ b/matlab/ppoisson_fd.m
@@ -1,110 +1,110 @@
%
% @file ppoisson_fd.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
if ~exist('file'), file='ppoisson_fd.h5'; end
prb=h5readatt(file,'/','PRB');
nx=h5readatt(file,'/','NX');
ny=h5readatt(file,'/','NY');
lx=h5readatt(file,'/','LX');
ly=h5readatt(file,'/','LY');
kx=h5readatt(file,'/','KX');
ky=h5readatt(file,'/','KY');
beta=h5readatt(file,'/','BETA');
omega=h5readatt(file,'/','OMEGA');
relax=h5readatt(file,'/','RELAX');
nlevels=h5readatt(file,'/','LEVELS');
mu=h5readatt(file,'/','MU');
nu1=h5readatt(file,'/','NU1');
nu2=h5readatt(file,'/','NU2');
direct_solve_nits=h5readatt(file,'/','DIRECT_SOLVE_NITS');
title_str=sprintf(['PRB=%s, N=(%d,%d), relax=%s, V(%d,%d), LEVELS=%d, DIRECT SOLVE=%d'], ...
prb, nx, ny, relax, nu1, nu2, nlevels, direct_solve_nits);
x = h5read(file, '/xgrid');
y = h5read(file, '/ygrid');
[X,Y]=meshgrid(y,x);
n1=size(x,1);
n2=size(y,1);
n=n1*n2
mat = stencil_mat(file, '/MAT');
% $$$ figure
% $$$ spy(mat)
f = h5read(file,'/f'); f1d=reshape(f,n,1);
v = h5read(file,'/v'); v1d=reshape(v,n,1);
u = h5read(file,'/u'); u1d=reshape(u,n,1);
% $$$ udirect1d = mat\f1d;
% $$$ udirect=reshape(udirect1d,n1,n2);
% $$$ fprintf('Residual of direct solution = %.3e\n', norm(mat*udirect1d-f1d));
% $$$ fprintf('Error of direct solution = %.3e\n', norm(udirect1d- ...
% $$$ v1d));
figure
subplot(221)
pcolor(x,y,v'-u')
xlabel('X'); ylabel('Y');
shading interp
colorbar
title('Error on the finest grid')
subplot(222)
[yy,iy] = max(abs(v),[],2);
[xx,ix] = max(yy);
iy0=iy(ix);
str=sprintf('Solution at y = %.4f', y(iy0));
plot(x, v(:,iy0),x, u(:,iy0),'o')
xlabel('x'); ylabel(str);
grid on
legend('Analytic Solution', 'Computed Solution')
title(title_str)
subplot(223)
[xx,ix] = max(abs(v));
[yy,iy] = max(xx);
ix0=ix(iy);
str=sprintf('Solution at x = %.4f', x(ix0));
plot(y, v(ix0,:),y, u(ix0,:),'o')
xlabel('y'); ylabel(str);
grid on
figure
resid_it=h5read(file, '/resid');
err_it=h5read(file, '/error');
nits = size(resid_it,1)-1;
it=0:nits;
subplot(211)
semilogy(it,resid_it,'o-')
xlabel('Iterations')
ylabel('Residual norm')
grid on
title(title_str)
subplot(212)
semilogy(it, err_it, 'o-')
xlabel('Iterations')
ylabel('Norm of Discretization Error')
grid on
diff --git a/matlab/relax.m b/matlab/relax.m
index 37d3fca..bfbaa29 100644
--- a/matlab/relax.m
+++ b/matlab/relax.m
@@ -1,55 +1,55 @@
%
% @file relax.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
load relax.mat
figure
subplot(221)
semilogy(jac_1(:,1),jac_1(:,2),gs_1(:,1),gs_1(:,2))
grid on
legend('Jacobi', 'GS')
xlabel('Iterations'); ylabel('Error')
title('NX=32, P=1')
subplot(222)
semilogy(jac_3(:,1),jac_3(:,2),gs_3(:,1),gs_3(:,2))
grid on
legend('Jacobi', 'GS')
xlabel('Iterations'); ylabel('Error')
title('NX=32, P=3')
subplot(223)
semilogy(jac_1(:,1),jac_1(:,4),gs_1(:,1),gs_1(:,4))
grid on
legend('Jacobi', 'GS')
xlabel('Iterations'); ylabel('Discretization error')
title('NX=32, P=1')
subplot(224)
semilogy(jac_3(:,1),jac_3(:,4),gs_3(:,1),gs_3(:,4))
grid on
legend('Jacobi', 'GS')
xlabel('Iterations'); ylabel('Discretization error')
title('NX=32, P=3')
diff --git a/matlab/stencil_mat.m b/matlab/stencil_mat.m
index 4283768..6c7030c 100644
--- a/matlab/stencil_mat.m
+++ b/matlab/stencil_mat.m
@@ -1,49 +1,49 @@
%
% @file stencil_mat.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
function [mata, diag] = stencil_mat(file, dset)
id = double(h5read(file, strcat(dset,'/dists')));
val = h5read(file, strcat(dset,'/val'));
n1 = size(val,1);
n2 = size(val,2);
n = n1*n2;
ndiag = size(val,3);
dists = id(:,1) + n1*id(:,2);
val = reshape(val,n,ndiag);
%% Shift the off-diagonals %%
for k=1:length(dists)
d=dists(k);
if d < 0
val(1:n+d,k) = val(1-d:n,k);
elseif d > 0
val(n:-1:d+1,k) = val(n-d:-1:1,k);
end
end
mata = spdiags(val, dists, n,n);
if nargout == 2
idiag = find(dists==0);
diag = val(:,idiag);
end
diff --git a/matlab/tcdsmat.m b/matlab/tcdsmat.m
index 5331df7..e5a974a 100644
--- a/matlab/tcdsmat.m
+++ b/matlab/tcdsmat.m
@@ -1,91 +1,91 @@
%
% @file tcdsmat.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
mat='/MAT1';
gbmat;
clear S gb_mat;
file='tcdsmat.h5';
nx=hdf5read(file,'/NX');
ny=hdf5read(file,'/NY');
dists=hdf5read(file,strcat(mat,'/dists'));
vals=hdf5read(file,strcat(mat,'/vals'));
rowv=hdf5read(file,strcat(mat,'/rowv'));
colh=hdf5read(file,strcat(mat,'/colh'));
n=hdf5read(file,strcat(mat,'/RANK'));
nd=hdf5read(file,strcat(mat,'/NDIAGS'));
err=zeros(n,nd);
% Diagonal balancing of matrix
dbal = 1./sqrt(diag(a));
a = diag(dbal)*a*diag(dbal);
% Check CDS mat except row ny and column ny
for k=1:nd
d=dists(k);
i1=max(1,1-d); i2=min(n,n-d);
fprintf(1,'%8d %8d %8d\n',d,i1,i2);
for i=i1:i2
j=i+d;
if (i~=ny && j~=ny)
err(i,k) = a(i,j)-vals(i,k);
end
end
end
fprintf(1,'min/max of err: %8.4e, %8.4e\n',min(min(err)), max(max(err)));
% Check row ny and j .ne. ny
i=ny;
bw0=size(rowv,1);
for k=1:nd
d=dists(k);
j=i+d;
if ((j >= ny+1) && (j <= bw0))
err(i,k)=a(i,j)-rowv(j);
end
end
fprintf(1,'min/max of err: %8.4e, %8.4e\n',min(min(err)), max(max(err)));
% Check column ny
j=ny;
for k=1:nd
d=dists(k);
i=j-d;
if ((i >= ny+1) && (i <= bw0))
err(i,k)=a(i,j)-colh(i);
end
end
fprintf(1,'min/max of err: %8.4e, %8.4e\n',min(min(err)), max(max(err)));
% Check RHS
rhs=hdf5read(file,'/RHS');
fprintf('Err in RHS: %8.3e\n', max(max(abs(rhs-rhs0))))
% Check SOL
sol=hdf5read(file,'/SOL');
err= sol-sol0;
fprintf('Err SOL: %8.3e\n', max(max(abs(err))));
diff --git a/matlab/tcdsmat_plot_sol.m b/matlab/tcdsmat_plot_sol.m
index 0754929..68234c5 100644
--- a/matlab/tcdsmat_plot_sol.m
+++ b/matlab/tcdsmat_plot_sol.m
@@ -1,79 +1,79 @@
%
% @file tcdsmat_plot_sol.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='tcdsmat.h5'
%
% Get data from data sets
%
r=hdf5read(file,'/xgrid');
t=hdf5read(file,'/ygrid');
sol=hdf5read(file,'/sol')';
solexact=hdf5read(file,'/solana')';
err=hdf5read(file,'/errors')';
%
% Attributes
%
NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
NIDBAS1=hdf5read(file,'/NIDBAS1');
NIDBAS2=hdf5read(file,'/NIDBAS2');
MBESS=hdf5read(file,'/MBESS');
LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ...
NIDBAS1, NIDBAS2, MBESS);
attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data;
attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data;
attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data;
attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data;
attr=hdf5read(file,'/errors/title');err_ann=attr.Data;
[R,T]=meshgrid(r,t);
x = R.*cos(T); y= R.*sin(T);
figure
subplot(221)
pcolor(double(r),double(t),double(sol));
shading interp
xlabel(x_ann); ylabel(y_ann)
title(LABEL)
colorbar
subplot(222)
pcolor(double(x),double(y),double(sol))
shading interp
hold off, axis image
xlabel('X'); ylabel('Y')
title('X-Y plane')
colorbar
subplot(223)
surfc(double(x),double(y),double(sol))
xlabel('X'); ylabel('Y');
title(sol_ann)
subplot(224)
surfc(double(x),double(y),double(err))
xlabel('X'); ylabel('Y');
title(err_ann)
diff --git a/matlab/test_intergrid.m b/matlab/test_intergrid.m
index 4d89561..5b22016 100644
--- a/matlab/test_intergrid.m
+++ b/matlab/test_intergrid.m
@@ -1,102 +1,102 @@
%
% @file test_intergrid.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
if ~exist('file'), file='test_intergrid0.h5'; end
%
nx=h5readatt(file,'/','NX');
ny=h5readatt(file,'/','NY');
lx=h5readatt(file,'/','LX');
ly=h5readatt(file,'/','LY');
kx=h5readatt(file,'/','KX');
ky=h5readatt(file,'/','KY');
nlevels=h5readatt(file,'/','LEVELS');
title_str=sprintf(['N=(%d,%d), Lx=%d, Ly=%d, LEVELS=%d, KX=%d, KY=%d'], ...
nx,ny,lx,ly,nlevels,kx,ky);
if nlevels ~= 2
disp 'levels should be 2!'
return
end
%
% Prolongation matrices at the coarsest grid
%
levels=nlevels;
mglevel=sprintf('/mglevels/level.%.2d', levels);
dset=strcat(mglevel,'/matpx');
matpx=csr_mat(file,dset);
dset=strcat(mglevel,'/matpy');
matpy=csr_mat(file,dset);
for l=1:2
mglevel=sprintf('/mglevels/level.%.2d', l);
x=h5read(file,strcat(mglevel,'/x'));
y=h5read(file,strcat(mglevel,'/y'));
f=h5read(file,strcat(mglevel,'/f'));
v=h5read(file,strcat(mglevel,'/v'));
figure
subplot(221)
[yy,iy] = max(abs(f),[],2); [xx,ix] = max(yy); iy0=iy(ix);
str=sprintf('f at y = %.4f', y(iy0));
plot(x, f(:,iy0),'o-')
xlabel('x'); ylabel(str);
grid on
title(title_str)
subplot(222)
[xx,ix] = max(abs(f)); [yy,iy] = max(xx); ix0=ix(iy);
str=sprintf('f at x = %.4f', x(ix0));
plot(y, f(ix0,:),'o-')
xlabel('y'); ylabel(str);
grid on
title(title_str)
subplot(223)
[yy,iy] = max(abs(v),[],2); [xx,ix] = max(yy); iy0=iy(ix);
str=sprintf('v at y = %.4f', y(iy0));
plot(x, v(:,iy0),'ro-')
xlabel('x'); ylabel(str);
grid on
title(title_str)
subplot(224)
[xx,ix] = max(abs(v)); [yy,iy] = max(xx); ix0=ix(iy);
str=sprintf('v at x = %.4f', x(ix0));
plot(y, v(ix0,:),'ro-')
xlabel('y'); ylabel(str);
grid on
title(title_str)
if l==1
ffine=f; vfine=v;
else
fcoarse=f; vcoarse=v;
end
end
%% Check
err_restriction = matpx'*ffine*matpy./4 - fcoarse;
err_prolong = matpx*vcoarse*matpy' - vfine;
max(max(abs(err_restriction)))
max(max(abs(err_prolong)))
diff --git a/matlab/test_jacobi.m b/matlab/test_jacobi.m
index 2a71117..875f559 100644
--- a/matlab/test_jacobi.m
+++ b/matlab/test_jacobi.m
@@ -1,96 +1,96 @@
%
% @file test_jacobi.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
if ~exist('file'), file='test_jacobi.h5'; end
x = h5read(file, '/xgrid');
y = h5read(file, '/ygrid');
[X,Y]=meshgrid(y,x);
n1=size(x,1);
n2=size(y,1);
n=n1*n2
mat = stencil_mat(file, '/MAT');
% $$$ figure
% $$$ spy(mat)
f = h5read(file,'/f'); f1d=reshape(f,n,1);
v = h5read(file,'/v'); v1d=reshape(v,n,1);
u = h5read(file,'/u'); u1d=reshape(u,n,1);
resids = h5read(file,'/resids');
errs = h5read(file,'/errs');
udirect1d = mat\f1d;
udirect=reshape(udirect1d,n1,n2);
fprintf('Residual of direct solution = %.3e\n', norm(mat*udirect1d-f1d));
fprintf('Error of direct solution = %.3e\n', norm(udirect1d-v1d));
figure
subplot(211)
[yy,iy] = max(abs(v),[],2);
[xx,ix] = max(yy);
iy0=iy(ix);
str=sprintf('Solution at y = %.4f', y(iy0));
plot(x, v(:,iy0),x, u(:,iy0),'o')
xlabel('x'); ylabel(str);
grid on
legend('Analytic Solution', 'Computed Solution')
subplot(212)
[xx,ix] = max(abs(v));
[yy,iy] = max(xx);
ix0=ix(iy);
str=sprintf('Solution at x = %.4f', x(ix0));
plot(y, v(ix0,:),y, u(ix0,:),'o')
xlabel('y'); ylabel(str);
grid on
% $$$ figure
% $$$ subplot(321)
% $$$ surf(x,y,v'); title('Exact solution')
% $$$ xlabel('X'); ylabel('Y')
% $$$ subplot(322)
% $$$ surf(x,y,f'); title('RHS')
% $$$ xlabel('X'); ylabel('Y')
% $$$ subplot(323)
% $$$ surf(x,y,u'); title('Num. solution')
% $$$ subplot(324)
% $$$ surf(x,y,udirect'); title('Direct. solution')
% $$$ xlabel('X'); ylabel('Y')
% $$$ subplot(325)
% $$$ surf(x,y,resids'); title('Residuals')
% $$$ xlabel('X'); ylabel('Y')
% $$$ subplot(326)
% $$$ surf(x,y,errs'); title('Errors')
% $$$ xlabel('X'); ylabel('Y')
resid_it=h5read(file, '/resid');
err_it=h5read(file, '/error');
nits = size(resid_it,1)-1;
it=0:nits;
figure
semilogy(it,resid_it, it, err_it)
legend('Residual norm', 'Discretization error')
xlabel('Iterations')
grid on
diff --git a/matlab/test_jacobig.m b/matlab/test_jacobig.m
index a0e31d2..39dee82 100644
--- a/matlab/test_jacobig.m
+++ b/matlab/test_jacobig.m
@@ -1,84 +1,84 @@
%
% @file test_jacobig.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
if ~exist('file'), file='test_jacobig.h5'; end
x = h5read(file, '/xgrid');
y = h5read(file, '/ygrid');
[X,Y]=meshgrid(y,x);
n1=size(x,1);
n2=size(y,1);
n=n1*n2
mat = stencil_mat(file, '/MAT');
% $$$ figure
% $$$ spy(mat)
f = h5read(file,'/f'); f1d=reshape(f,n,1);
v = h5read(file,'/v'); v1d=reshape(v,n,1);
u = h5read(file,'/u'); u1d=reshape(u,n,1);
udirect1d = mat\f1d;
udirect=reshape(udirect1d,n1,n2);
fprintf('Residual of direct solution = %.3e\n', norm(mat*udirect1d-f1d));
fprintf('Error of direct solution = %.3e\n', norm(udirect1d- ...
v1d));
figure
subplot(221)
pcolor(x,y,v'-u')
xlabel('X'); ylabel('Y');
shading interp
colorbar
title('Error')
subplot(223)
[yy,iy] = max(abs(v),[],2);
[xx,ix] = max(yy);
iy0=iy(ix);
str=sprintf('Solution at y = %.4f', y(iy0));
plot(x, v(:,iy0),x, u(:,iy0),'o')
xlabel('x'); ylabel(str);
grid on
legend('Analytic Solution', 'Computed Solution')
subplot(224)
[xx,ix] = max(abs(v));
[yy,iy] = max(xx);
ix0=ix(iy);
str=sprintf('Solution at x = %.4f', x(ix0));
plot(y, v(ix0,:),y, u(ix0,:),'o')
xlabel('y'); ylabel(str);
grid on
resid_it=h5read(file, '/resid');
err_it=h5read(file, '/error');
nits = size(resid_it,1)-1;
it=0:nits;
subplot(222)
semilogy(it,resid_it, it, err_it)
legend('Residual norm', 'Discretization error')
xlabel('Iterations')
grid on
diff --git a/matlab/test_mg.m b/matlab/test_mg.m
index d0ed10b..5f0b5fd 100644
--- a/matlab/test_mg.m
+++ b/matlab/test_mg.m
@@ -1,164 +1,164 @@
%
% @file test_mg.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='test_mg.h5';
%
nx=h5readatt(file,'/','NX');
nidbas=h5readatt(file,'/','NIDBAS');
relax=h5readatt(file,'/','RELAX');
levels=h5readatt(file,'/','LEVELS');
nu1=h5readatt(file,'/','NU1');
nu2=h5readatt(file,'/','NU2');
alpha=h5readatt(file,'/','ALPHA');
omega=h5readatt(file,'/','OMEGA');
if alpha == 0
kx=h5readatt(file,'/','KMODE');
title_str=sprintf('N=%d, NIDBAS=%d, KX=%d, relax=%s, omega=%.3f, levels = %d, nu1 = %d, nu2 = %d', ...
nx,nidbas,kx,relax, omega, levels, nu1, nu2);
else
modem=h5readatt(file,'/','MODEM');
modep=h5readatt(file,'/','MODEP');
title_str=sprintf('N=%d, NIDBAS=%d, modem=%d, modep=%d, relax=%s, omega=%.3f, levels = %d, nu1 = %d, nu2 = %d', ...
nx,nidbas,modem,modep,relax, omega, levels, nu1, nu2);
end
%
% Read matrices at coarset grid
%
for lev=2:levels
%
% FE mat at fine grid
mglevel=sprintf('/mglevels/level.%.2d', lev-1);
dset=strcat(mglevel,'/mata');
mata_f = gb_mat(file, dset);
%
% FE mat at coarse grid
mglevel=sprintf('/mglevels/level.%.2d', lev);
dset=strcat(mglevel,'/mata');
mata_c = gb_mat(file, dset);
%
% Prolong mat
dset=strcat(mglevel,'/matp');
matp=h5read(file,dset);
%
% Check
fprintf(1,'Level %d: ||A_coarse - P''*A_fine*P|| = %g\n', lev, norm(matp'*mata_f*matp ...
- mata_c))
end
%
% Iterations
dset='/Iterations/';
err=h5read(file, strcat(dset,'errors'));
disc_err=h5read(file, strcat(dset,'disc_errors'));
resid=h5read(file, strcat(dset,'residues'));
its=0:1:size(err,1)-1;
figure
subplot(212)
semilogy(its,resid,'o-', its, disc_err,'h-')
legend('Residue', 'Error')
grid on
xlabel('Iterations'); ylabel('Norm od residue and error');
title(title_str);
%
% Plot grid values at the last iteration
xgrid=h5read(file, '/Iterations/xgrid');
u_calc=h5read(file, '/Iterations/u_calc');
u_exact=h5read(file, '/Iterations/u_exact');
u_direct=h5read(file, '/Iterations/u_direct');
subplot(211)
plot(xgrid, u_exact, xgrid,u_calc,'o')
legend('Analytic', 'Calculated')
xlabel('X');ylabel('Grid values of solution')
grid on
title(title_str);
%
%
% $$$ mglevel=sprintf('/mglevels/level.%.2d', 1);
% $$$ dset=strcat(mglevel,'/mata');
% $$$ A = gb_mat(file, dset);
% $$$ D = diag(diag(A),0);
% $$$ n=rank(A);
% $$$ k=1:1:n;
% $$$ if relax(1:2) == 'ja'
% $$$ %
% $$$ % Compute eigenvalues of Rj = D^(-1)*A
% $$$ %
% $$$ [V, l] = eig(A,D);
% $$$ [lambda, iss] = sort(diag(l));
% $$$ V = V(1:end,iss);
% $$$ %
% $$$ % Spectral radius of Jacobi iteration matrix
% $$$ % R(omega) = max |1-omega*lambda|
% $$$ %
% $$$ om=0:0.01:1;
% $$$ for i=1:size(om,2)
% $$$ rho(i) = max(abs(1-om(i).*lambda));
% $$$ end
% $$$ fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(1-omega*lambda)))
% $$$
% $$$ figure
% $$$ subplot(211)
% $$$ plot(k, 1-omega*lambda, 'o-')
% $$$ xlabel('mode k'); ylabel('Eigen value of inv(D)*A')
% $$$ grid on
% $$$ title(title_str)
% $$$ subplot(212)
% $$$ plot(om, rho)
% $$$ xlabel('\omega'); ylabel('Spectral Radius')
% $$$ grid on
% $$$
% $$$ for i=1:n
% $$$ k=mod(i-1,4*5)+1;
% $$$ if k==1
% $$$ figure
% $$$ title(title_str)
% $$$ end
% $$$ subplot(4,5,k)
% $$$ str = sprintf('Mode = %d, ||R|| = %.3f', i, 1-omega*lambda(i));
% $$$ plot(V(:,i)); grid on
% $$$ title(str)
% $$$ end
% $$$ elseif relax(1:2) == 'gs'
% $$$ %
% $$$ % Spectral radius of GS Iteration Matrix
% $$$ % Rg = (D-L)^(-1) * U
% $$$ %
% $$$ B = tril(A,0); % D-L
% $$$ lambda = eig(-triu(A,1),B);
% $$$ fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(lambda)))
% $$$ figure
% $$$ plot(lambda, 'o', 'MarkerSize', 6)
% $$$ axis equal
% $$$ grid on
% $$$ xlabel('Real of eigenvalues'); ylabel('Imag of eigenvalues')
% $$$ title(title_str)
% $$$ end
diff --git a/matlab/test_mg2d.m b/matlab/test_mg2d.m
index cac6b9d..279ad29 100644
--- a/matlab/test_mg2d.m
+++ b/matlab/test_mg2d.m
@@ -1,102 +1,102 @@
%
% @file test_mg2d.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='test_mg2d.h5';
%
nx=h5readatt(file,'/','NX');
ny=h5readatt(file,'/','NY');
kx=h5readatt(file,'/','KX');
ky=h5readatt(file,'/','KY');
nidbas1=h5readatt(file,'/','NIDBAS1');
nidbas2=h5readatt(file,'/','NIDBAS2');
relax=h5readatt(file,'/','RELAX');
nlevels=h5readatt(file,'/','LEVELS');
nu1=h5readatt(file,'/','NU1');
nu2=h5readatt(file,'/','NU2');
mu=h5readatt(file,'/','MU');
title_str=sprintf(['N=(%d,%d), NIDBAS=(%d,%d), relax=%s, nu1=%d, ' ...
'nu2=%d, mu=%d, LEVELS=%d, KX=%d, KY=%d'], ...
nx,ny,nidbas1,nidbas2,relax,nu1,nu2,mu,nlevels,kx,ky);
%
% Prolongation matrices at the coarsest grid
%
levels=nlevels;
mglevel=sprintf('/mglevels/level.%.2d', levels);
dset=strcat(mglevel,'/matpx');
matpx=csr_mat(file,dset);
dset=strcat(mglevel,'/matpy');
matpy=csr_mat(file,dset);
%
% FE matrix at the finest grid
%
levels=1;
mglevel=sprintf('/mglevels/level.%.2d', levels);
dset=strcat(mglevel,'/mata');
[mata,diag]=csr_mat(file,dset);
f=h5read(file,strcat(mglevel,'/f'));
v=h5read(file,strcat(mglevel,'/v'));
f1d=h5read(file,strcat(mglevel,'/f1d'));
v1d=h5read(file,strcat(mglevel,'/v1d'));
% $$$ figure
% $$$ spy(mata)
%
% Solutions at the finest grid
%
x=h5read(file,'/solutions/xg');
y=h5read(file,'/solutions/yg');
sol_anal=h5read(file,'/solutions/anal');
sol_calc=h5read(file,'/solutions/calc');
% $$$ figure
% $$$ subplot(211)
% $$$ surf(x,y,sol_anal')
% $$$ xlabel('X'); ylabel('Y');
% $$$ title('Analytical solution on the finest grid')
% $$$ subplot(212)
% $$$ surf(x,y,sol_calc')
% $$$ xlabel('X'); ylabel('Y');
% $$$ title('Calculated solution on the finest grid')
%
% Iterations
%
dset='/Iterations/';
disc_err=h5read(file, strcat(dset,'disc_errors'));
resid=h5read(file, strcat(dset,'residues'));
its=0:1:size(resid,1)-1;
figure
subplot(211)
semilogy(its,resid,'o-')
grid on
xlabel('Iterations'); ylabel('Norm of residue');
title(title_str);
subplot(212)
semilogy(its,disc_err,'h-')
grid on
xlabel('Iterations'); ylabel('Norm of error');
diff --git a/matlab/test_mg2d_cyl.m b/matlab/test_mg2d_cyl.m
index 3258da6..86f6ec5 100644
--- a/matlab/test_mg2d_cyl.m
+++ b/matlab/test_mg2d_cyl.m
@@ -1,141 +1,141 @@
%
% @file test_mg2d_cyl.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='test_mg2d_cyl.h5';
%
nx=h5readatt(file,'/','NX');
ny=h5readatt(file,'/','NY');
nidbas1=h5readatt(file,'/','NIDBAS1');
nidbas2=h5readatt(file,'/','NIDBAS2');
modem=h5readatt(file,'/','MODEM');
modep=h5readatt(file,'/','MODEP');
relax=h5readatt(file,'/','RELAX');
nlevels=h5readatt(file,'/','LEVELS');
nu1=h5readatt(file,'/','NU1');
nu2=h5readatt(file,'/','NU2');
mu=h5readatt(file,'/','MU');
title_str=sprintf(['N=(%d,%d), NIDBAS=(%d,%d), relax=%s, nu1=%d, ' ...
'nu2=%d, mu=%d, LEVELS=%d, MODEM=%d, MODEP=%d'], ...
nx,ny,nidbas1,nidbas2,relax,nu1,nu2,mu,nlevels,modem,modep);
%
% Prolongation matrices at the coarsest grid
%
levels=nlevels;
mglevel=sprintf('/mglevels/level.%.2d', levels);
dset=strcat(mglevel,'/matpx');
matpx=csr_mat(file,dset);
dset=strcat(mglevel,'/matpy');
matpy=csr_mat(file,dset);
%
% FE matrix at the finest grid
%
levels=1;
mglevel=sprintf('/mglevels/level.%.2d', levels);
dset=strcat(mglevel,'/mata');
[mata,diag]=csr_mat(file,dset);
f=h5read(file,strcat(mglevel,'/f'));
v=h5read(file,strcat(mglevel,'/v'));
f1d=h5read(file,strcat(mglevel,'/f1d'));
v1d=h5read(file,strcat(mglevel,'/v1d'));
% $$$ figure
% $$$ spy(mata)
%
% Solutions at the finest grid
%
x=h5read(file,'/solutions/xg');
y=h5read(file,'/solutions/yg');
sol_anal=h5read(file,'/solutions/anal');
sol_calc=h5read(file,'/solutions/calc');
sol_direct=h5read(file,'/solutions/direct');
nx=int32(size(x,1));
ny=int32(size(y,1));
figure
subplot(211)
surf(x,y,sol_anal')
xlabel('X'); ylabel('Y');
title('Analytical solution on the finest grid')
subplot(212)
surf(x,y,sol_calc')
% $$$ surf(x,y, (abs(sol_calc'-sol_anal')))
xlabel('X'); ylabel('Y');
title('Calculated solution on the finest grid')
figure
subplot(211)
plot(x, sol_anal(:,ny/2),x, sol_calc(:,ny/2),'o')
xlabel('r');
grid on
legend('Analytic Solution', 'MG Solution')
title(title_str)
subplot(212)
if modem == 0
plot(y, sol_anal(1,:),y, sol_calc(1,:),'o')
else
plot(y, sol_anal(nx/2,:),y, sol_calc(nx/2,:),'o')
end
xlabel('\theta');
grid on
title(title_str)
% $$$ figure
% $$$ subplot(211)
% $$$ semilogy(x, abs(sol_anal(:,ny/2)-sol_calc(:,ny/2)),'o')
% $$$ xlabel('r'); ylabel('Error')
% $$$ grid on
% $$$ title(title_str)
% $$$ subplot(212)
% $$$ if modem == 0
% $$$ semilogy(y, abs(sol_anal(1,:)-sol_calc(1,:)),'o')
% $$$ else
% $$$ semilogy(y, abs(sol_anal(nx/2,:)-sol_calc(nx/2,:)),'o')
% $$$ end
% $$$ xlabel('\theta'); ylabel('Error');
% $$$ grid on
% $$$ title(title_str)
% $$$
%
% Iterations
%
dset='/Iterations/';
disc_err=h5read(file, strcat(dset,'disc_errors'));
resid=h5read(file, strcat(dset,'residues'));
its=0:1:size(resid,1)-1;
figure
subplot(211)
semilogy(its,resid,'o-')
grid on
xlabel('Iterations'); ylabel('Norm of residue');
title(title_str);
subplot(212)
semilogy(its,disc_err,'h-')
grid on
xlabel('Iterations'); ylabel('Norm of error');
diff --git a/matlab/test_mgp.m b/matlab/test_mgp.m
index 4f5fb1b..69d5214 100644
--- a/matlab/test_mgp.m
+++ b/matlab/test_mgp.m
@@ -1,98 +1,98 @@
%
% @file test_mgp.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='test_mgp.h5';
%
nx=h5readatt(file,'/','NX');
nidbas=h5readatt(file,'/','NIDBAS');
relax=h5readatt(file,'/','RELAX');
levels=h5readatt(file,'/','LEVELS');
nu1=h5readatt(file,'/','NU1');
nu2=h5readatt(file,'/','NU2');
title_str=sprintf('NX = %d, NIDBAS = %d, levels = %d, nu1 = %d, nu2 = %d', nx, nidbas, levels, nu1, nu2);
%
% Read matrices at coarset grid
%
for lev=2:levels
%
% FE mat at fine grid
mglevel=sprintf('/mglevels/level.%.2d', lev-1);
dset=strcat(mglevel,'/mata');
mata_f=h5read(file,dset);
n=size(mata_f,1);
%
% FE mat at coarse grid
mglevel=sprintf('/mglevels/level.%.2d', lev);
dset=strcat(mglevel,'/mata');
mata_c=h5read(file,dset);
n=size(mata_c,1);
%
% Prolong mat
dset=strcat(mglevel,'/matp');
matp=h5read(file,dset);
%
% Check
fprintf(1,'Level %d: ||A_coarse - P''*A_fine*P|| = %g\n', lev, norm(matp'*mata_f*matp ...
- mata_c))
end
%
% Iterations
dset='/Iterations/';
err=h5read(file, strcat(dset,'errors'));
disc_err=h5read(file, strcat(dset,'disc_errors'));
resid=h5read(file, strcat(dset,'residues'));
its=0:1:size(err,1)-1;
figure
subplot(221)
semilogy(its,resid,'o-', its, disc_err,'h-')
legend('Residue', 'Error')
grid on
xlabel('Iterations'); ylabel('Norm od residue and error');
title(title_str);
%
% Plot grid values at the last iteration
xgrid=h5read(file, '/Iterations/xgrid');
u_calc=h5read(file, '/Iterations/u_calc');
u_exact=h5read(file, '/Iterations/u_exact');
u_direct=h5read(file, '/Iterations/u_direct');
subplot(222)
plot(xgrid,u_exact, xgrid,u_calc,'o')
xlabel('X');ylabel('Grid values of solution')
grid on
title(title_str);
subplot(223)
semilogy(xgrid,abs(u_calc-u_direct))
xlabel('X');ylabel('Diff with direct solution')
grid on
title(title_str);
subplot(224)
semilogy(xgrid,abs(u_calc-u_exact))
xlabel('X');ylabel('Diff with exact solution')
grid on
title(title_str);
diff --git a/matlab/test_relax.m b/matlab/test_relax.m
index 938b605..8d8fbbb 100644
--- a/matlab/test_relax.m
+++ b/matlab/test_relax.m
@@ -1,157 +1,157 @@
%
% @file test_relax.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='test_relax.h5';
%
nx=h5readatt(file,'/','NX');
alpha=h5readatt(file,'/','ALPHA');
nidbas=h5readatt(file,'/','NIDBAS');
relax=h5readatt(file,'/','RELAX');
omega=h5readatt(file,'/','OMEGA');
if alpha == 0
kx=h5readatt(file,'/','KX');
title_str=sprintf('N=%d, NIDBAS=%d, KX=%d, relax=%s, omega=%.3f', ...
nx,nidbas,kx,relax, omega);
else
modem=h5readatt(file,'/','MODEM');
modep=h5readatt(file,'/','MODEP');
title_str=sprintf('N=%d, NIDBAS=%d, modem=%d, modep=%d, relax=%s, omega=%.3f', ...
nx,nidbas,modem,modep,relax, omega);
end
%
% Solutions at the finest grid
%
x=h5read(file,'/solutions/xg');
sol_direct=h5read(file,'/solutions/direct');
sol_anal=h5read(file,'/solutions/anal');
sol_calc=h5read(file,'/solutions/calc');
figure
subplot(211)
plot(x, sol_anal, x, sol_calc, 'o')
legend('Analytic', 'Calculated')
xlabel('X')
grid on
title(title_str);
%
% Relaxations
%
errdisc=h5read(file,'/relaxation/errdisc');
resid=h5read(file,'/relaxation/resid');
its=0:1:size(errdisc)-1;
subplot(212)
semilogy(its,errdisc,its,resid)
legend('Discretisation Error', 'Residual Norm')
xlabel('Iterations')
grid on
title(title_str)
%
% FE Matrix
%
dset = '/MATA/';
A = gb_mat(file, dset);
D = diag(diag(A),0);
n=rank(A);
k=1:1:n;
if relax(1:2) == 'ja'
%
% Compute eigenvalues of Rj = D^(-1)*A
%
[V, l] = eig(A,D);
[lambda, iss] = sort(diag(l));
V = V(1:end,iss);
%
% Spectral radius of Jacobi iteration matrix
% R(omega) = max |1-omega*lambda|
%
om=0:0.01:1;
for i=1:size(om,2)
rho(i) = max(abs(1-om(i).*lambda));
end
fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(1-omega*lambda)))
figure
subplot(211)
plot(k, 1-omega*lambda, 'o-')
xlabel('mode k'); ylabel('Eigen value of inv(D)*A')
grid on
title(title_str)
subplot(212)
plot(om, rho)
omega_c = 2.0/max(lambda);
str = sprintf('Critical omega = %.3f', omega_c)
title(str)
xlabel('\omega'); ylabel('Spectral Radius')
grid on
elseif relax(1:2) == 'gs'
%
% Spectral radius of GS Iteration Matrix
% Rg = -(D+L)^(-1) * U
%
B = tril(A,0); % D+L
% $$$ [V, l] = eig(-triu(A,1),B); lambda=diag(l);
% $$$ [V, l] = eig(B,A); lambda = 1 - 1./diag(l);
[V, l] = eig(A,B); lambda = 1 - diag(l);
[lambda, iss] = sort(lambda, 'descend');
V = V(1:end,iss);
fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(lambda)))
figure
subplot(211)
plot(real(lambda), imag(lambda), 'o')
xlabel('Real of eigenvalues'); ylabel(['Imag of ' ...
'eigenvalues'])
axis equal
title(title_str)
grid on
subplot(212)
plot(k, abs(lambda), 'o-')
xlabel('Mode'); ylabel('eigenvalues')
grid on
end
%
% Plot eigenvectors
neig=size(lambda,1);
for i=1:neig
k=mod(i-1,4*5)+1;
if k==1
figure
title(title_str)
end
subplot(4,5,k)
if relax(1:2) == 'ja'
str = sprintf('Mode = %d, ||R|| = %.4f', i, 1-omega* ...
lambda(i));
else
str = sprintf('Mode = %d, ||R|| = %.4f', i, ...
lambda(i));
end
plot(V(:,i)); grid on
title(str)
end
diff --git a/matlab/test_relax2d.m b/matlab/test_relax2d.m
index 3e5f778..2370781 100644
--- a/matlab/test_relax2d.m
+++ b/matlab/test_relax2d.m
@@ -1,86 +1,86 @@
%
% @file test_relax2d.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='test_relax2d.h5';
%
nx=h5readatt(file,'/','NX');
ny=h5readatt(file,'/','NY');
kx=h5readatt(file,'/','KX');
ky=h5readatt(file,'/','KY');
nidbas1=h5readatt(file,'/','NIDBAS1');
nidbas2=h5readatt(file,'/','NIDBAS2');
levels=h5readatt(file,'/','LEVELS');
relax=h5readatt(file,'/','RELAX');
title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), KX=%d, KY=%d, relax=%s', ...
nx,ny,nidbas1,nidbas2,kx,ky,relax);
%
% Prolongation matrices at the coarsest grid
%
mglevel=sprintf('/mglevels/level.%.2d', levels);
dset=strcat(mglevel,'/matpx');
matpx=csr_mat(file,dset);
dset=strcat(mglevel,'/matpy');
matpy=csr_mat(file,dset);
%
% FE matrix at the finest grid
%
levels=1;
mglevel=sprintf('/mglevels/level.%.2d', levels);
dset=strcat(mglevel,'/mata');
[mata,diag]=csr_mat(file,dset);
x=h5read(file,strcat(mglevel,'/x'));
y=h5read(file,strcat(mglevel,'/y'));
f=h5read(file,strcat(mglevel,'/f'));
v=h5read(file,strcat(mglevel,'/v'));
f1d=h5read(file,strcat(mglevel,'/f1d'));
v1d=h5read(file,strcat(mglevel,'/v1d'));
figure
spy(mata)
%
% Solutions at te finest grid
%
sol_direct=h5read(file,'/solutions/direct');
sol_anal=h5read(file,'/solutions/anal');
figure
surf(x,y,sol_direct')
xlabel('X'); ylabel('Y');
%
% Relaxations
%
errdisc=h5read(file,'/relaxation/errdisc');
resid=h5read(file,'/relaxation/resid');
its=0:1:size(errdisc)-1;
figure
semilogy(its,errdisc,its,resid)
legend('Discretisation Error', 'Residual Norm')
xlabel('Iterations')
grid on
title(title_str)
diff --git a/matlab/test_relax2d_cyl.m b/matlab/test_relax2d_cyl.m
index e01c146..78f4c4c 100644
--- a/matlab/test_relax2d_cyl.m
+++ b/matlab/test_relax2d_cyl.m
@@ -1,183 +1,183 @@
%
% @file test_relax2d_cyl.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='test_relax2d_cyl.h5';
%
nx=h5readatt(file,'/','NX');
ny=h5readatt(file,'/','NY');
modem=h5readatt(file,'/','MODEM');
modep=h5readatt(file,'/','MODEP');
nidbas1=h5readatt(file,'/','NIDBAS1');
nidbas2=h5readatt(file,'/','NIDBAS2');
levels=h5readatt(file,'/','LEVELS');
omega=h5readatt(file,'/','OMEGA');
relax=h5readatt(file,'/','RELAX');
if relax(1:2) == 'ja'
title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), MODEM=%d, MODEP=%d, relax=%s, omega=%.3f', ...
nx,ny,nidbas1,nidbas2,modem,modep,relax, omega);
else
title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), MODEM=%d, MODEP=%d, relax=%s,', ...
nx,ny,nidbas1,nidbas2,modem,modep,relax);
end
%
% Prolongation matrices at the coarsest grid
%
mglevel=sprintf('/mglevels/level.%.2d', levels);
dset=strcat(mglevel,'/matpx');
matpx=csr_mat(file,dset);
dset=strcat(mglevel,'/matpy');
matpy=csr_mat(file,dset);
%
% Solutions at the finest grid
%
mglevel=sprintf('/mglevels/level.%.2d', 1);
x=h5read(file,strcat(mglevel,'/x'));
y=h5read(file,strcat(mglevel,'/y'));
f=h5read(file,strcat(mglevel,'/f'));
v=h5read(file,strcat(mglevel,'/v'));
f1d=h5read(file,strcat(mglevel,'/f1d'));
v1d=h5read(file,strcat(mglevel,'/v1d'));
sol_direct=h5read(file,'/solutions/direct');
sol_relax=h5read(file,'/solutions/relax');
sol_anal=h5read(file,'/solutions/anal');
% $$$ figure
% $$$ surf(x,y,sol_direct')
% $$$ xlabel('r'); ylabel('\theta');
% $$$ title(title_str)
% $$$ figure
% $$$ subplot(211)
% $$$ plot(x, sol_anal(:,ny/2),x, sol_direct(:,ny/2),'o')
% $$$ xlabel('r'); ylabel('Direct solution')
% $$$ grid on
% $$$ title(title_str)
% $$$ subplot(212)
% $$$ if modem == 0
% $$$ plot(x, sol_anal(1,:),x, sol_direct(1,:),'o')
% $$$ else
% $$$ plot(x, sol_anal(nx/2,:),x, sol_direct(nx/2,:),'o')
% $$$ end
% $$$ xlabel('\theta'); ylabel('Direct solution')
% $$$ grid on
% $$$ title(title_str)
%
% Relaxations
%
errdisc=h5read(file,'/relaxation/errdisc');
resid=h5read(file,'/relaxation/resid');
its=0:1:size(errdisc)-1;
figure
semilogy(its,errdisc,its,resid)
legend('Discretisation Error', 'Residual Norm')
xlabel('Iterations')
grid on
title(title_str)
% $$$ figure
% $$$ subplot(211)
% $$$ plot(x, sol_anal(:,ny/2),x, sol_relax(:,ny/2),'o')
% $$$ xlabel('r'); ; ylabel('Relaxed solution')
% $$$ grid on
% $$$ title(title_str)
% $$$ subplot(212)
% $$$ if modem == 0
% $$$ plot(x, sol_anal(1,:),x, sol_relax(1,:),'o')
% $$$ else
% $$$ plot(x, sol_anal(nx/2,:),x, sol_relax(nx/2,:),'o')
% $$$ end
% $$$ xlabel('\theta'); ylabel('Relaxed solution')
% $$$ grid on
% $$$ title(title_str)
figure
subplot(211)
plot(x, sol_anal(:,ny/2),x, sol_direct(:,ny/2),'o')
xlabel('r'); ; ylabel('Direct solution')
grid on
title(title_str)
subplot(212)
plot(x, sol_anal(1,:),x, sol_direct(1,:),'o')
xlabel('\theta'); ylabel('Direct solution at the axis')
grid on
title(title_str)
%
% FE matrix at the finest grid
%
levels=1;
mglevel=sprintf('/mglevels/level.%.2d', levels);
dset=strcat(mglevel,'/mata');
[mata,diag]=csr_mat(file,dset);
n=size(diag,1);
% $$$ figure
% $$$ spy(mata)
% $$$ title(title_str)
if relax(1:2) == 'ja'
%
% Eigenvalues of inv(D)*A
%
matd=spdiags(diag,0,n,n);
lambda = eigs(mata, matd, n);
om=0:0.01:0.8;
clear rho;
for i=1:size(om,2)
rho(i) = max(abs(1-om(i).*lambda));
end
figure
subplot(211)
plot(lambda,'o')
ylabel('Eigenvalue of inv(D)*A')
grid on
title(title_str)
subplot(212)
plot(om,rho)
xlabel('\omega'); ylabel('\rho')
grid on
omega_c = 2.0/max(lambda);
lambda_min = eigs(mata, matd, 1, 'SM');
fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', abs(1-omega*lambda_min))
str = sprintf('Critical omega = %.3f', omega_c)
title(str)
else
%
% Spectral radius of GS Iteration Matrix
% Rg = -(D+L)^(-1) * U
%
matl= tril(mata,0); % D+L
lambda = eigs(-triu(mata,1),matl);
fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(lambda)))
figure
plot(lambda, 'o')
axis equal
grid on
xlabel('Real of eigenvalues'); ylabel('Imag of eigenvalues')
title(title_str)
end
diff --git a/matlab/test_stencil.m b/matlab/test_stencil.m
index db11cc9..174267a 100644
--- a/matlab/test_stencil.m
+++ b/matlab/test_stencil.m
@@ -1,62 +1,62 @@
%
% @file test_stencil.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
if ~exist('file'), file='test_stencil.h5'; end
x = h5read(file, '/xgrid');
y = h5read(file, '/ygrid');
b1 = h5read(file, '/barr1');
b2 = h5read(file, '/barr2');
b3 = h5read(file, '/barr3');
n1=size(x,1);
n2=size(y,1);
n=n1*n2
mat = stencil_mat(file, '/MAT');
% $$$ figure
% $$$ spy(mat)
fprintf('||B1|| = %e\n', norm(reshape(b1,n,1)));
fprintf('||B2|| = %e\n', norm(reshape(b2,n,1)));
fprintf('||B3|| = %e\n', norm(reshape(b3,n,1)));
% $$$ figure
% $$$ subplot(321)
% $$$ surf(x,y,v'); title('Exact solution')
% $$$ xlabel('X'); ylabel('Y')
% $$$ subplot(322)
% $$$ surf(x,y,f'); title('RHS')
% $$$ xlabel('X'); ylabel('Y')
% $$$ subplot(323)
% $$$ surf(x,y,u'); title('Num. solution')
% $$$ subplot(324)
% $$$ surf(x,y,udirect'); title('Direct. solution')
% $$$ xlabel('X'); ylabel('Y')
% $$$ subplot(325)
% $$$ surf(x,y,resids'); title('Residuals')
% $$$ xlabel('X'); ylabel('Y')
% $$$ subplot(326)
% $$$ surf(x,y,errs'); title('Errors')
% $$$ xlabel('X'); ylabel('Y')
diff --git a/matlab/test_stencilg.m b/matlab/test_stencilg.m
index fb7dccd..fae5111 100644
--- a/matlab/test_stencilg.m
+++ b/matlab/test_stencilg.m
@@ -1,63 +1,63 @@
%
% @file test_stencilg.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
if ~exist('file'), file='test_stencilg.h5'; end
x = h5read(file, '/xgrid');
y = h5read(file, '/ygrid');
a = h5read(file, '/arr');
b1 = h5read(file, '/barr1');
b2 = h5read(file, '/barr2');
b3 = h5read(file, '/barr3');
n1=size(x,1);
n2=size(y,1);
n=n1*n2
mat = stencil_mat(file, '/MAT');
% $$$ figure
% $$$ spy(mat)
fprintf('||B1|| = %e\n', norm(reshape(b1,n,1)));
fprintf('||B2|| = %e\n', norm(reshape(b2,n,1)));
fprintf('||B3|| = %e\n', norm(reshape(b3,n,1)));
% $$$ figure
% $$$ subplot(321)
% $$$ surf(x,y,v'); title('Exact solution')
% $$$ xlabel('X'); ylabel('Y')
% $$$ subplot(322)
% $$$ surf(x,y,f'); title('RHS')
% $$$ xlabel('X'); ylabel('Y')
% $$$ subplot(323)
% $$$ surf(x,y,u'); title('Num. solution')
% $$$ subplot(324)
% $$$ surf(x,y,udirect'); title('Direct. solution')
% $$$ xlabel('X'); ylabel('Y')
% $$$ subplot(325)
% $$$ surf(x,y,resids'); title('Residuals')
% $$$ xlabel('X'); ylabel('Y')
% $$$ subplot(326)
% $$$ surf(x,y,errs'); title('Errors')
% $$$ xlabel('X'); ylabel('Y')
diff --git a/matlab/test_transf2d.m b/matlab/test_transf2d.m
index 506c5ad..9936481 100644
--- a/matlab/test_transf2d.m
+++ b/matlab/test_transf2d.m
@@ -1,105 +1,105 @@
%
% @file test_transf2d.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='test_transf2d.h5';
%
nx(1)=h5readatt(file,'/','NX');
ny(1)=h5readatt(file,'/','NY');
kx=h5readatt(file,'/','KX');
ky=h5readatt(file,'/','KY');
nidbas1=h5readatt(file,'/','NIDBAS1');
nidbas2=h5readatt(file,'/','NIDBAS2');
nlevels=h5readatt(file,'/','LEVELS');
title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), KX=%d, KY=%d', ...
nx(1),ny(1),nidbas1,nidbas2,kx,ky);
%
% Grid sizes on each levels
%
for l=2:nlevels
nx(l) = nx(l-1)/2;
ny(l) = ny(l-1)/2;
end
%
% Prolongation matrices at the coarsest grid
%
levels=nlevels;
mglevel=sprintf('/mglevels/level.%.2d', levels);
dset=strcat(mglevel,'/matpx');
matpx=csr_mat(file,dset);
dset=strcat(mglevel,'/matpy');
matpy=csr_mat(file,dset);
%
% FE matrix at the finest grid
%
levels=1;
mglevel=sprintf('/mglevels/level.%.2d', levels);
dset=strcat(mglevel,'/mata');
[mata,diag]=csr_mat(file,dset);
x=h5read(file,strcat(mglevel,'/x'));
y=h5read(file,strcat(mglevel,'/y'));
f=h5read(file,strcat(mglevel,'/f'));
v=h5read(file,strcat(mglevel,'/v'));
f1d=h5read(file,strcat(mglevel,'/f1d'));
v1d=h5read(file,strcat(mglevel,'/v1d'));
figure
spy(mata)
%
% Solutions at the finest grid
%
sol_direct=h5read(file,'/solutions/direct');
sol_anal=h5read(file,'/solutions/anal');
vfine=h5read(file,'/solutions/vfine');
figure
subplot(211)
surf(x,y,sol_direct')
xlabel('X'); ylabel('Y');
title('Direct Solve on the finest grid')
subplot(212)
surf(x,y,vfine'-sol_direct')
xlabel('X'); ylabel('Y');
title('Prolongation solution on the finest grid')
%
% Errors
%
errdisc = h5read(file,'/errors/errdisc');
resid = h5read(file,'/errors/resid');
restrict = h5read(file,'/errors/restrict');
prolong = h5read(file,'/errors/prolong');
errdisc_prolong = h5read(file,'/errors/disc_err_prolong');
figure
subplot(211)
loglog(nx, errdisc, 'o-', nx(1:end-1), errdisc_prolong, 'h-')
grid on;
legend('Direct Solution', 'Prolonged Solution')
xlabel('N'); ylabel('Discretization Errors')
title(title_str);
subplot(212)
loglog(nx(2:end), restrict, 'o-', nx(1:end-1), prolong, 'h-')
grid on;
legend('Restricted RHS', 'Prolonged Solution')
xlabel('N'); ylabel('Discretization Errors')
diff --git a/matlab/test_transf2d_cyl.m b/matlab/test_transf2d_cyl.m
index 1466f02..63b4c00 100644
--- a/matlab/test_transf2d_cyl.m
+++ b/matlab/test_transf2d_cyl.m
@@ -1,117 +1,117 @@
%
% @file test_transf2d_cyl.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='test_transf2d_cyl.h5';
%
nx(1)=h5readatt(file,'/','NX');
ny(1)=h5readatt(file,'/','NY');
modem=h5readatt(file,'/','MODEM');
modep=h5readatt(file,'/','MODEP');
nidbas1=h5readatt(file,'/','NIDBAS1');
nidbas2=h5readatt(file,'/','NIDBAS2');
nlevels=h5readatt(file,'/','LEVELS');
title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), MODEM=%d, MODEP=%d', ...
nx(1),ny(1),nidbas1,nidbas2,modem,modep);
%
% Grid sizes on each levels
%
for l=2:nlevels
nx(l) = nx(l-1)/2;
ny(l) = ny(l-1)/2;
end
%
% Prolongation matrices at the coarsest grid
%
levels=nlevels;
mglevel=sprintf('/mglevels/level.%.2d', levels);
dset=strcat(mglevel,'/matpx');
matpx=csr_mat(file,dset);
dset=strcat(mglevel,'/matpy');
matpy=csr_mat(file,dset);
%
% FE matrix at the finest grid
%
levels=1;
mglevel=sprintf('/mglevels/level.%.2d', levels);
dset=strcat(mglevel,'/mata');
[mata,diag]=csr_mat(file,dset);
x=h5read(file,strcat(mglevel,'/x'));
y=h5read(file,strcat(mglevel,'/y'));
f=h5read(file,strcat(mglevel,'/f'));
v=h5read(file,strcat(mglevel,'/v'));
f1d=h5read(file,strcat(mglevel,'/f1d'));
v1d=h5read(file,strcat(mglevel,'/v1d'));
% $$$ figure
% $$$ spy(mata)
%
% Solutions at the finest grid
%
sol_direct=h5read(file,'/solutions/direct');
sol_anal=h5read(file,'/solutions/anal');
vfine=h5read(file,'/solutions/vfine');
figure
subplot(211)
surf(x,y,sol_direct')
xlabel('X'); ylabel('Y');
title('Direct Solve on the finest grid')
subplot(212)
surf(x,y,vfine')
xlabel('X'); ylabel('Y');
title('Prolongation solution on the finest grid')
figure
subplot(311)
plot(x, sol_direct(:,ny(1)/2),x, vfine(:,ny(1)/2),'o')
xlabel('r');
grid on
legend('Direct Solution', 'Prolonged Solution')
title(title_str)
subplot(313)
plot(y, sol_direct(1,:),y, vfine(1,:),'o')
xlabel('\theta'); ylabel('On axis')
grid on
title(title_str)
subplot(312)
plot(y, sol_direct(nx(1)/2,:),y, vfine(nx(1)/2,:),'o')
xlabel('\theta'); ylabel('Off axis')
grid on
title(title_str)
%
% Errors
%
errdisc = h5read(file,'/errors/errdisc');
resid = h5read(file,'/errors/resid');
restrict = h5read(file,'/errors/restrict');
prolong = h5read(file,'/errors/prolong');
errdisc_prolong = h5read(file,'/errors/disc_err_prolong');
figure
loglog(nx, errdisc, 'o-', nx(2:end), errdisc_prolong, 'h-')
grid on;
legend('Direct Solution', 'Prolonged Solution')
xlabel('N'); ylabel('Discretization Errors')
title(title_str);
diff --git a/matlab/tpardiso.m b/matlab/tpardiso.m
index 5efb0a1..7609732 100644
--- a/matlab/tpardiso.m
+++ b/matlab/tpardiso.m
@@ -1,72 +1,72 @@
%
% @file tpardiso.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
mat='/MAT1';
gbmat;
clear S gb_mat;
% Diagonal balancing of matrix
dbal = 1./sqrt(diag(a));
a = diag(dbal)*a*diag(dbal);
file='tpardiso.h5';
mat='/MAT';
n=hdf5read(file,strcat(mat,'/N'));
nz=hdf5read(file,strcat(mat,'/NZ'));
irow=hdf5read(file,strcat(mat,'/irow'));
cols=hdf5read(file,strcat(mat,'/cols'));
val=hdf5read(file,strcat(mat,'/val'));
perm=hdf5read(file,strcat(mat,'/perm'));
amat=zeros(n,n);
% Check PARDISO mat
for i=1:n
for k=irow(i):irow(i+1)-1
j=cols(k);
amat(i,j) = val(k);
amat(j,i) = val(k);
end
end
err = a-amat;
errmx = max(max(abs(err)));
fprintf(1,'Max. error = %e\n', errmx);
figure
spy(sparse(amat(perm,perm)),'r.');
LABEL=sprintf('n = %d, nz =%d', n, nz);
title(LABEL)
% $$$ pmat=zeros(n);
% $$$ for i=1:n
% $$$ pmat(i,perm(i))=1;
% $$$ end
% $$$ amod=pmat*amat*pmat';
% $$$ S=sparse(amod);
% $$$ figure
% $$$ spy(S,'r.');
% $$$ LABEL=sprintf('n = %d', n);
% $$$ title(LABEL)
diff --git a/matlab/two_grid.m b/matlab/two_grid.m
index 3be72de..d6a6a0d 100644
--- a/matlab/two_grid.m
+++ b/matlab/two_grid.m
@@ -1,92 +1,92 @@
%
% @file two_grid.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
file='two_grid.h5';
%
nx=h5readatt(file,'/','NX');
nidbas=h5readatt(file,'/','NIDBAS');
levels=h5readatt(file,'/','LEVELS');
title_str=sprintf('NX = %d, NIDBAS = %d, levels = %d', nx, nidbas, levels);
%
% Read matrices at coarset grid
%
for lev=2:levels
%
% FE mat at fine grid
mglevel=sprintf('/mglevels/level.%.2d', lev-1);
dset=strcat(mglevel,'/mata');
ku=h5readatt(file,dset,'KU'); kl=ku;
n=h5readatt(file,dset,'RANK');
gbmat=h5read(file,dset);
mata_f=zeros(n,n);
for i=1:n
jmin = max(1,i-kl);
jmax = min(n,i+ku);
for j=jmin:jmax
ib = kl+ku+i-j+1;
mata_f(i,j)=gbmat(ib,j);
end
end
dset=strcat(mglevel,'/f');
f_fine = h5read(file,dset);
dset=strcat(mglevel,'/v');
v_fine = h5read(file,dset);
%
% FE mat at coarse grid
mglevel=sprintf('/mglevels/level.%.2d', lev);
dset=strcat(mglevel,'/mata');
ku=h5readatt(file,dset,'KU'); kl=ku;
n=h5readatt(file,dset,'RANK');
gbmat=h5read(file,dset);
mata_c=zeros(n,n);
for i=1:n
jmin = max(1,i-kl);
jmax = min(n,i+ku);
for j=jmin:jmax
ib = kl+ku+i-j+1;
mata_c(i,j)=gbmat(ib,j);
end
end
dset=strcat(mglevel,'/f');
f_coarse = h5read(file,dset);
dset=strcat(mglevel,'/v');
v_coarse = h5read(file,dset);
%
% Prolong mat
dset=strcat(mglevel,'/matp');
matp=h5read(file,dset);
%
% Check
fprintf(1,'Level %d: ||A_coarse - P''*A_fine*P|| = %g\n', lev, norm(matp'*mata_f*matp ...
- mata_c))
end
%
v_prolong = h5read(file,'/v_prolong');
figure
plot(v_fine)
hold on
plot(v_prolong, 'r')
diff --git a/matlab/zcsr_mat.m b/matlab/zcsr_mat.m
index 0b1b491..9411e3c 100644
--- a/matlab/zcsr_mat.m
+++ b/matlab/zcsr_mat.m
@@ -1,44 +1,44 @@
%
% @file zcsr_mat.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
function [mata, diag] = zcsr_mat(file, dset)
n=hdf5read(file,dset, 'RANK');
nnz=hdf5read(file,dset, 'NNZ');
cols=hdf5read(file, strcat(dset,'/cols'));
irow=hdf5read(file, strcat(dset,'/irow'));
val=h5Complex_ll(file, strcat(dset,'/val'));
idiag=hdf5read(file, strcat(dset,'/idiag'));
for i=1:n
s = irow(i);
e = irow(i+1)-1;
rows(s:e) = i;
end
cols=double(cols);
rows=double(rows);
mata = sparse(rows,cols,val);
if nargout == 2
diag = val(idiag);
end
diff --git a/matlab/zmumps_mat.m b/matlab/zmumps_mat.m
index 2db15b7..4558886 100644
--- a/matlab/zmumps_mat.m
+++ b/matlab/zmumps_mat.m
@@ -1,44 +1,44 @@
%
% @file zmumps_mat.m
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
function [mata, diag] = zmumps_mat(file, dset)
n=hdf5read(file,dset, 'RANK');
Nnz=hdf5read(file,dset, 'NNZ');
cols=hdf5read(file, strcat(dset,'/cols'));
irow=hdf5read(file, strcat(dset,'/irow'));
irn=hdf5read(file, strcat(dset,'/mumps_par/IRN'));
val=h5Complex_ll(file, strcat(dset,'/val'));
idiag=int32(find((irn-cols)==0));
for i=1:n
s = irow(i);
e = irow(i+1)-1;
rows(s:e) = i;
end
cols=double(cols);
rows=double(rows);
mata = sparse(rows,cols,val);
if nargout == 2
diag = val(idiag);
end
diff --git a/multigrid/CMakeLists.txt b/multigrid/CMakeLists.txt
index 27779d3..b99a423 100644
--- a/multigrid/CMakeLists.txt
+++ b/multigrid/CMakeLists.txt
@@ -1,32 +1,32 @@
#
# @file CMakeLists.txt
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Nicolas Richart
# @author Trach-Minh Tran
#
add_subdirectory(src)
if(BSPLINES_EXAMPLES)
add_subdirectory(wk)
endif()
#add_subdirectory(halpern)
diff --git a/multigrid/docs/Makefile b/multigrid/docs/Makefile
index e144969..d18fa33 100644
--- a/multigrid/docs/Makefile
+++ b/multigrid/docs/Makefile
@@ -1,52 +1,52 @@
#
# @file Makefile
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Trach-Minh Tran
#
all: multigrid.pdf multigrid_2d.pdf mg_gbs.pdf
grid.eps: grid.tex
tex grid.tex
dvips -o grid.eps grid.dvi
mg_gbs.dvi: grid.eps
clean:
rm -f grid.eps *.dvi *.log *.aux *~ *.toc *.flc *.bbl *.blg *.out *~
distclean: clean
.SUFFIXES:
.SUFFIXES: .tex .dvi .pdf
%.dvi: %.tex
latex $<
@while ( grep "Rerun to get cross-references" \
${<:tex=log} > /dev/null ); do \
latex $<; \
done
latex $<
%.pdf: %.dvi
dvipdf $<
diff --git a/multigrid/docs/grid.tex b/multigrid/docs/grid.tex
index 13b90a0..40091d9 100644
--- a/multigrid/docs/grid.tex
+++ b/multigrid/docs/grid.tex
@@ -1,70 +1,70 @@
%
% @file grid.tex
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
\hoffset=-2truecm
\voffset=-2truecm
\special{papersize=9cm,5cm}
\hsize=9truecm
\vsize=5truecm
\parindent=0pt
\nopagenumbers
\input pstricks
\pspicture(4,4)
\psline[linestyle=solid](0,0)(8,0)
\psline[linestyle=dotted](0,1)(8,1)
\psline[linestyle=solid](0,2)(8,2)
\psline[linestyle=dotted](0,3)(8,3)
\psline[linestyle=solid](0,4)(8,4)
\psline[linestyle=solid](0,0)(0,4)
\psline[linestyle=dotted](1,0)(1,4)
\psline[linestyle=solid](2,0)(2,4)
\psline[linestyle=dotted](3,0)(3,4)
\psline[linestyle=solid](4,0)(4,4)
\psline[linestyle=dotted](5,0)(5,4)
\psline[linestyle=solid](6,0)(6,4)
\psline[linestyle=dotted](7,0)(7,4)
\psline[linestyle=solid](8,0)(8,4)
\psdots[dotstyle=square,dotscale=2](0,0)(2,0)(4,0)(6,0)(8,0)
\psdots[dotstyle=square,dotscale=2](0,2)(2,2)(4,2)(6,2)(8,2)
\psdots[dotstyle=square,dotscale=2](0,4)(2,4)(4,4)(6,4)(8,4)
\psdots[dotstyle=*](0,0)(1,0)(2,0)(3,0)(4,0)(5,0)(6,0)(7,0)(8,0)
\psdots[dotstyle=*](0,1)(1,1)(2,1)(3,1)(4,1)(5,1)(6,1)(7,1)(8,1)
\psdots[dotstyle=*](0,2)(1,2)(2,2)(3,2)(4,2)(5,2)(6,2)(7,2)(8,2)
\psdots[dotstyle=*](0,3)(1,3)(2,3)(3,3)(4,3)(5,3)(6,3)(7,3)(8,3)
\psdots[dotstyle=*](0,4)(1,4)(2,4)(3,4)(4,4)(5,4)(6,4)(7,4)(8,4)
\endpspicture
\bye
diff --git a/multigrid/docs/mg_gbs.tex b/multigrid/docs/mg_gbs.tex
index 4c1eec0..4fff2b6 100644
--- a/multigrid/docs/mg_gbs.tex
+++ b/multigrid/docs/mg_gbs.tex
@@ -1,1623 +1,1623 @@
%
% @file mg_gbs.tex
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
\documentclass[a4paper]{article}
\usepackage{linuxdoc-sgml}
\usepackage{graphicx}
\usepackage{hyperref}
%\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{mathtools}
\usepackage{placeins}
\usepackage{multirow}
\usepackage{latexsym}
\usepackage{listings}
\usepackage{xcolor}
\usepackage{rotating}
\def\RepFigures{FIGURES_mg_gbs}
\title{\tt Multigrid Solver for GBS}
\author{Trach-Minh Tran, Federico Halpern\\CRPP/EPFL}
\date{v1.0, June 2015}
\begin{document}
\maketitle
\tableofcontents
\section{The PDE}
The PDE considered is
\begin{equation}
\label{eq:pde}
\left[\frac{\partial^2}{\partial x^2} + \tau\frac{\partial^2}{\partial
x\partial y} + \frac{\partial^2}{\partial y^2} - a(x,y)\right] u(x,y) =
f(x,y), \qquad 0\le x \le L_x, \; 0\le y \le L_y.
\end{equation}
On the four boundaries, homogeneous Dirichlet boundary condition
$u=0$ as well as Neumann boundary condition
$\partial u/\partial n=0$ can be applied.
\section{Discretization}
The grid points $(x_i,y_j)$ are defined by
\begin{equation}
\begin{split}
x_i &= ih_x = i\frac{L_x}{N_x}, \quad i=0,\ldots, N_x \\
y_j &= jh_y = j\frac{L_y}{N_y}, \quad j=0,\ldots, N_y \\
\end{split}
\end{equation}
Second order Finite Difference discretization of Eq.\ref{eq:pde} leads
to the following 9-point stencil
\begin{equation}
\label{eq:stencil}
S_{ij} = \frac{1}{h_x^2}
\begin{bmatrix}
-\tau\alpha/4 & \alpha^2 & \tau\alpha/4 \\
1 & -2(1+\alpha^2)-h_x^2a_{ij} & 1 \\
\tau\alpha/4 & \alpha^2 &-\tau\alpha/4 \\
\end{bmatrix}
, \qquad \mbox{where $\alpha=h_x/h_y$}.
\end{equation}
Note that the mesh aspect ratio $\alpha$ results in the same stencil
for the \emph{anisotropic} Poisson equation with $h_x=h_y$:
\begin{equation}
\frac{\partial^2 u}{\partial x^2} + \alpha^2
\frac{\partial^2 u}{\partial y^2} = f.
\end{equation}
It is shown in \cite[p.~119]{Briggs} that this anisotropy can degrade
the performance of multigrid using standard relaxations such as
Gauss-Seidel or damped Jacobi can be strongly degraded.
With the \emph{lexicographic} numbering
\begin{equation}
I = j(N_x+1) + i+1,
\end{equation}
for the $(N_x+1)(N_y+1)$ nodes, the discretized problem can be
expressed as a matrix problem
\begin{equation}
\label{eq:matrix}
\mathbf{Au} = \mathbf{f},
\end{equation}
where $\mathbf{A}$ is a 9-diagonal matrix, assembled using the stencil defined
above. Homogeneous Dirichlet boundary condition can be imposed, for example, on
the face $j=0$ simply by \emph{clearing} the matrix rows and columns
$1,2,\ldots, N_x+1$, and setting the diagonal terms to 1.
Neumann boundary condition $\partial u/\partial x=0$ at
the face $i=0$, can be simply implemented by imposing
$u_{-1j}=u_{1j}$. The stencil for the boundary nodes $(0,j)$
can thus be modified as
\begin{equation}
S_{0j} = \frac{1}{h_x^2}
\begin{bmatrix}
0 &\alpha^2 & 0 \\
0 &-2(1+\alpha^2)-h_x^2a_{0j} & 2 \\
0 &\alpha^2 & 0 \\
\end{bmatrix}
.
\end{equation}
Two model problems are considered in this report:
\begin{description}
\item[\texttt{\textbf{DDDD}} problem:] Homogeneous Dirichlet BC
at all the 4 boundaries. The \emph{analytic solution} is
\begin{equation}
u(x,y) = \sin\frac{2\pi k_xx}{L_x}\sin\frac{2\pi k_yy}{L_y},
\qquad \mbox{where $k_x$, $k_y$ are positive integers}.
\end{equation}
\item[\texttt{\textbf{NNDD}} problem:] Neumann boundary conditions
$\partial u/\partial x=0$ at $x=0$ and $x=L_x$, homogeneous
Dirichlet BC at $y=0$ and $y=L_y$. The \emph{analytic solution} is
\begin{equation}
u(x,y) = \cos\frac{2\pi k_xx}{L_x}\sin\frac{2\pi k_yy}{L_y},
\qquad \mbox{where $k_x$, $k_y$ are positive integers}.
\end{equation}
\end{description}
In both problems, $a$ depends only on $x$:
\begin{equation}
\label{eq:density}
a(x,y)= \exp\left[-\frac{(x-L_x/3)^2}{(L_x/2)^2}\right].
\end{equation}
The sparse direct solver MUMPS \cite{MUMPS} is used to solve (\ref{eq:matrix}) in
order to check the convergence of the schema described
above. Fig.\ref{fig:convergence} shows the expected \emph{quadratic}
convergence of the error with respect to $h_x$ with fixed $\alpha=h_x/h_y=0.5$ for
both problems, when the grid size is varied from $16\times 64$ to $512\times 2048$.
\begin{figure}[hbt]
\centering
\includegraphics[angle=0,width=0.8\hsize]{\RepFigures/convergence}
\caption{Convergence of the error
$\| u_{calc}- u_{anal}\|_\infty$ wrt the number of intervals
in the $x$ direction $N_x$ for $L_x=100$, $L_y=800$, $k_x=k_y=4$, $\tau=1$ and $N_y=4N_x$.}
\label{fig:convergence}
\end{figure}
\section{Multigrid $V$-cycle}
\label{sec-mgProc}
Given an approximate $\mathbf{u}^h$ and right hand side $\mathbf{f}^h$
defined at some grid level represented by the grid spacing $h$, the
following MG $V$-cycle procedure
\begin{equation*}
\boxed{\mathbf{u}^h \leftarrow MG^h(\mathbf{u}^h,\mathbf{f}^h)}
\end{equation*}
will compute a \emph{new} $\mathbf{u}^h$. It is defined recursively by the
following steps:
\begin{enumerate}
\item If $h$ is the coarsest mesh size,
\begin{itemize}
\item Direct solve $\mathbf{A}^h\mathbf{u}^h=\mathbf{f}^h$
\item Goto 3.
\end{itemize}
\item Else
\begin{itemize}
\item Relax $\mathbf{u}^h$ $\nu_1$ times.
\item $\mathbf{f}^{2h} \leftarrow {\mathbf{R}}(\mathbf{f}^h-\mathbf{A}^h\mathbf{u}^h)$.
\item $\mathbf{u}^{2h} \leftarrow MG^{2h}(\mathbf{u}^{2h},\mathbf{f}^{2h})$ $\mu$ times.
\item $\mathbf{u}^h\leftarrow
\mathbf{u}^h+{\mathbf{P}}\mathbf{u}^{2h}$.
\item Relax $\mathbf{u}^h$ $\nu_2$ times.
\end{itemize}
\item Return
\end{enumerate}
In the procedure above, the operators $\mathbf{R}$ and $\mathbf{P}$
denote respectively the \emph{restriction} (from \emph{fine} to
\emph{coarse} grid) and the \emph{prolongation} (from \emph{coarse} to
\emph{fine} grid). Notice that in this multigrid procedure,
$\mathbf{R}$ applies only to the \emph{right hand side} while $\mathbf{P}$
applies only to the \emph{solution}. The standard $V(\nu_1,\nu_2)$ cycle is obtained by
calling this $MG^h$ procedure with $\mathbf{f}^h$ defined at the
\emph{finest} grid level, a guess $\mathbf{u}^h=0$ and $\mu=1$, while
$\mu=2$ results in the $W(\nu_1,\nu_2)$ cycle.
Details on the grid coarsening, the inter-grid transfers and methods
of relaxation are given in the following.
\subsection{Grid coarsening}
Let start with the one-dimensional \emph{fine} grid defined
by $x_i,\, i=0,\ldots,N$, assuming that $N$ is even. The next coarse grid
(with $N/2$ intervals) is obtained by simply discarding the grid
points with \emph{odd} indices.
In order to get a \emph{smallest coarsest} grid (so that it is possible
to solve \emph{cheaply} the problem with a \emph{direct} method), $N$ should be
$N=N_c2^{L-1}$ where $L$ the total number of grid levels and $N_c$ is either
2 or a \emph{small odd} integer. As an example, the fine grid with $N=768$ can have
up to 9 grid levels, and a coarsest grid with 3 intervals, see
Table~\ref{tab:level}.
\begin{table}[hbt]
\centering
\begin{tabular}{|l||r|r|r|r|r|}\hline
$L$ & \multicolumn{5}{c|}{$N$} \\ \hline
1 & 2 & 3 & 5 & 7 & 9\\
2 & 4 & 6 & 10 & 14 & 18\\
3 & 8 & 12 & 20 & 28 & 36\\
4 & 16 & 24 & 40 & 56 & 72\\
5 & 32 & 48 & 80 & 112 & 144\\
6 & 64 & 96 & 160 & 224 & 288\\
7 & 128 & 192 & 320 & 448 & 576\\
8 & 256 & 384 & 640 & 896 & 1152\\
9 & 512 & 768 & 1280 & 1792 & 2304\\
10 & 1024 & 1536 & 2560 & 3584 & 4608\\
\hline
\end{tabular}
\caption{Set of values of the \emph{fine} grid number of intervals $N$
to obtain a \emph{coarsest} grid size at most equal to $9$ with
at most $10$ grid levels.}
\label{tab:level}
\end{table}
For a two-dimensional grid, the same procedure is applied to both
dimensions. The result of such procedure is illustrated in
Fig.~\ref{fig:2d_coarsening}, for a $8\times4$ fine grid.
\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=0.6\hsize]{grid}
\caption{A \emph{coarse} $4\times 2$ grid ($\Box$) obtained from a
$8\times4$ fine grid ($\bullet$).}
\label{fig:2d_coarsening}
\end{figure}
\subsection{Inter-grid transfers}
The one-dimensional \emph{prolongation} operator for the second-order FD
discretization is chosen the same as the one obtained with the
\emph{linear Finite Elements} \cite{MG1D}. For a $N=8$ grid, it can be
represented as a $9\times 5$ matrix given by
\begin{equation}
\label{eq:1dprolongation}
\mathbf{P} =
\left(
\begin{matrix}
1 & 0 & 0 & 0 & 0 \\
1/2 & 1/2 & 0 & 0 & 0 \\
0 & 1 & 0 & 0 & 0 \\
0 & 1/2 & 1/2 & 0 & 0 \\
0 & 0 & 1 & 0 & 0 \\
0 & 0 & 1/2 & 1/2 & 0 \\
0 & 0 & 0 & 1 & 0 \\
0 & 0 & 0 & 1/2 & 1/2\\
0 & 0 & 0 & 0 & 1 \\
\end{matrix}\right)
\end{equation}
The \emph{restriction} matrix $\mathbf{R}$ is simply related to $\mathbf{P}$ by
\begin{equation}
\label{eq:1drestriction}
\mathbf{R} = \frac{1}{2}\mathbf{P}^{T}=\frac{1}{2}\left(
\begin{matrix}
1&1/2&0&0&0&0&0&0&0\\
0&1/2&1&1/2&0&0&0&0&0\\
0&0&0&1/2&1&1/2&0&0&0\\
0&0&0&0&0&1/2&1&1/2&0\\
0&0&0&0&0&0&0&1/2&1\\
\end{matrix}
\right).
\end{equation}
For Dirichlet BC imposed on the \emph{left} boundary one has to set
${P}_{21}=R_{12}=0$, while for Dirichlet BC imposed on the \emph{right}
boundary, ${P}_{N,N/2+1}=R_{N/2+1,N}=0$. Notice that these inter-grid operators are
identical to the standard \emph{linear interpolation} and \emph{full
weighting} operators.
For a two-dimensional problem, using the property that the grid is a
\emph{tensor product} of two one-dimensional grids, the
restriction of the right hand side $f^{h}_{ij}$ and the prolongation of
the solution $u^{2h}_{ij}$ can be computed as
\begin{equation}
\label{eq:2dintergrid}
\begin{split}
\mathbf{f}^{2h} &= \mathbf{R}^x \cdot \mathbf{f}^{h}\cdot (\mathbf{R}^y)^T \\
\mathbf{u}^{h} &= \mathbf{\mathbf{P}}^x \cdot \mathbf{u}^{2h}\cdot
(\mathbf{\mathbf{P}}^y)^T \\
\end{split}
\end{equation}
\subsection{Relaxations}
Gauss-Seidel and damped Jacobi iterations are used as relaxation
methods in the multigrid $V$ cycle. In general, Gauss-Seidel
method is more efficient but much more difficult to
\emph{parallelize} than the Jacobi method.
It should be noted that if $a(x,y)$ in Eq.~\ref{eq:pde} is
non-positive, both relaxations diverge! This can be seen by
considering the following one-dimensional FD equation with uniform
$a$:
\begin{equation}
u_{j-1} -(2+ah^2)u_j + u_{j+1} = h^2f_j.
\end{equation}
Using the damped Jacobi relaxation, the error
$\epsilon^{(m)}_j\equiv u_{anal}(x_j)-u_j^{(m)}$ at iteration $m+1$ is given by
\begin{equation}
\epsilon^{(m+1)}_j =
\frac{\omega}{2+h^2a}(\epsilon^{(m)}_{j-1}+\epsilon^{(m)}_{j+1})
+(1-\omega)\epsilon^{(m)}_j .
\end{equation}
Performing a \emph{local mode analysis} (or Fourier analysis) (see
\cite[p.~48]{Briggs}), assuming that
$\epsilon^{(m)}_j=A(m)e^{ij\theta}$, where $\theta$ is related to the
mode number $k$ by $\theta=2\pi k/N$, the
\emph{amplification factor} $G(\theta)$ is obtained as
\begin{equation}
\begin{split}
G(\theta) &= \frac{A(m+1)}{A(m)} =
\frac{2\omega}{2+h^2a}\cos\theta + (1-\omega) \\
&= G_0(\theta) -\frac{ \omega h^2a}{2+h^2a}\cos\theta
\simeq G_0(\theta) -\frac{ \omega h^2a}{2}\cos\theta, \\
G_0(\theta) &=1-2\omega\sin^2\frac{\theta}{2}, \\
\end{split}
\end{equation}
where $G_0(\theta)$ is the amplification factor for
$a=0$. Note that $|G_0(\theta)|< 1$ for \emph{all} $\theta$ and $0<\omega< 1$
but $\displaystyle{\max_{|\theta|<\pi}|G(\theta)|>1}$ if $a<0$.
In Gauss-Seidel relaxation method, the errors evolve as:
\begin{equation}
\epsilon^{(m+1)}_j = \frac{\epsilon^{(m+1)}_{j-1}+\epsilon^{(m)}_{j+1}}{2+h^2a}.
\end{equation}
Applying again the same Fourier analysis yields the
following complex amplification factor:
\begin{equation}
\begin{split}
G(\theta) &\simeq G_0(\theta)\left(1-\frac{h^2a}{2-e^{-i\theta}}\right) \\
G_0(\theta) &=\frac{e^{i\theta}}{2-e^{-i\theta}}, \quad
|G_0(\theta)| < 1
\end{split}
\end{equation}
which show that the Gauss-Seidel relaxations \emph{diverge} if $a<0$.
Notice that when $a>0$, the effect of $a$ on the amplification is
negligible and is thus ignored in the following two-dimensional
analysis. Applying the damped Jacobi scheme on the
stencil~(\ref{eq:stencil}), the error at the iteration $m+1$ is given
by:
\begin{equation}
\begin{split}
\epsilon^{(m+1)}_{ij} = & \frac{\omega}{2(1+\alpha^2)} \left[
\epsilon^{(m)}_{i-1,j} + \epsilon^{(m)}_{i+1,j} + \alpha^2(
\epsilon^{(m)}_{i,j-1} + \epsilon^{(m)}_{i,j+1}) + \frac{\tau\alpha}{4}(
\epsilon^{(m)}_{i+1,j+1}+\epsilon^{(m)}_{i-1,j-1} -
\epsilon^{(m)}_{i-1,j+1}-\epsilon^{(m)}_{i+1,j-1})
\right] \\
& + (1-\omega)\epsilon^{(m)}_{ij}. \\
\end{split}
\end{equation}
Using the two-dimensional Fourier mode expression
\begin{equation}
\epsilon^{(m)}_{ij} = A(m)e^{i(\theta_1+\theta_2)}, \quad -\pi <\theta_1,
\theta_2 \le \pi,
\end{equation}
the amplification factor $G=A(m+1)/A(m)$ is given
by
\begin{equation}
\label{eq:amp_jac}
G(\theta_1,\theta_2;\omega,\alpha,\tau)=1 - \frac{2\omega}{1+\alpha^2} \left(
\sin^2\frac{\theta_1}{2} + \alpha^2\sin^2\frac{\theta_2}{2} +
\frac{\tau\alpha}{4}\sin\theta_1\,\sin\theta_2
\right).
\end{equation}
The errors in Gauss-Seidel method, assuming a \emph{lexicographic} ordering
for the unknowns (increasing first $i$ then $j$), are updated according
to
\begin{equation}
\epsilon^{(m+1)}_{ij} = \frac{1}{2(1+\alpha^2)} \left[
\epsilon^{(m+1)}_{i-1,j} + \epsilon^{(m)}_{i+1,j} + \alpha^2(
\epsilon^{(m+1)}_{i,j-1} + \epsilon^{(m)}_{i,j+1}) + \frac{\tau\alpha}{4}(
\epsilon^{(m)}_{i+1,j+1}+\epsilon^{(m+1)}_{i-1,j-1} -
\epsilon^{(m)}_{i-1,j+1}-\epsilon^{(m+1)}_{i+1,j-1})
\right].
\end{equation}
The Fourier mode analysis then leads to the following complex
amplification factor
\begin{equation}
\label{eq:amp_gs}
G(\theta_1,\theta_2;\alpha,\tau) =
\frac{e^{i\theta_1} +
\left(\alpha^2+i\dfrac{\tau\alpha}{2}\sin\theta_1\right)e^{i\theta_2}}
{2(1+\alpha^2) - \left[e^{-i\theta_1}+\left(\alpha^2 -
i\dfrac{\tau\alpha}{2}\sin\theta_1\right)e^{-i\theta_2}\right]}.
\end{equation}
Curves of $G$ for \emph{fixed} $\theta_2$ are plotted in
Fig.~\ref{fig:fourier_jac} showing \emph{convergence} ($\max|G|<1$) for
$\tau=-1,0,1,2$, using the damped Jacobi method. The
same conclusions are obtained for Gauss-Seidel relaxations as shown
in Fig.~\ref{fig:fourier_gs} where the absolute values
of the complex amplification factor $G$ are plotted. However, for larger
$|\tau|>2$, both methods diverge as can be seen in
Fig.~\ref{fig:relax_diverge}.Notice however that the PDE
(\ref{eq:pde}) is \emph{elliptic} only when $|\tau|<2$ is satisfied!
\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=0.85\hsize]{\RepFigures/fourier_jac}
\caption{Amplification factor for damped Jacobi relaxations with
$\omega=0.8$ and $\alpha=h_x/h_y=1$ and $\tau=-1,0,1,2$ displayed as curves of
constant $\theta_2$. $\theta_2=0$ on the \emph{green} curve and $\pi$ on
the \emph{red} curve.}
\label{fig:fourier_jac}
\end{figure}
\begin{figure}[hbt]
\centering
\includegraphics[angle=0,width=0.8\hsize]{\RepFigures/fourier_gs}
\caption{Absolute value of the amplification factor for Gauss-Seidel
relaxations with $\alpha=h_x/h_y=1$ and $\tau=-1,0,1,2$, displayed as curves of
constant $\theta_2$. $\theta_2=0$ on the \emph{green} curve and $\pi$ on
the \emph{red} curve.}
\label{fig:fourier_gs}
\end{figure}
\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=0.8\hsize]{\RepFigures/relax_diverge}
\caption{Amplification factor for Jacobi (left) and Gauss-Seidel (right)
relaxations for $|\tau|=3,5$, $\alpha=h_x/h_y=1$, displayed as curves of
constant $\theta_2$. $\theta_2=0$ on the \emph{green} curve and $\pi$ on
the \emph{red} curve.}
\label{fig:relax_diverge}
\end{figure}
In summary, the local mode analysis predicts that
\begin{itemize}
\item Negative values of the coefficient $a$ and large
mixed derivative ($|\tau| > 2$) can make both damped Jacobi and
Gauss-Seidel relaxations diverge.
\item Positive values of $a$ can decrease the amplification factor
(improving thus the convergence rate) but
its contributions $h^2a$ decrease for increasing grid resolution.
\end{itemize}
These predictions will be checked against numerical experiments in the
next section.
\FloatBarrier
\section{Numerical Experiments}
\label{sec:NumExp1}
In the following numerical experiments, we look at the convergence
rate of the residual norm and the error norm which are defined at the
iteration $m$ by
\begin{equation}
\begin{split}
r^{(m)} &= \|\mathbf{f}-\mathbf{A}\mathbf{u}^{(m)}\|_\infty, \\
e^{(m)} &= \|\mathbf{u}^{(m)}-\mathbf{u}_{anal}\|_\infty.\\
\end{split}
\end{equation}
The iterations are stopped when the number of iterations reach an
user supplied \emph{maximum} of iterations or when the residual norm
is smaller than either a given \emph{relative} tolerance {\tt rtol}
\cite[p.~51]{TEMPL} or \emph{absolute} tolerance {\tt atol}:
\begin{equation}
\begin{split}
r^{(m)} &< \mbox{\tt rtol}
\cdot(\|\mathbf{A}\|_\infty\cdot\|\mathbf{u}^{(m)}\|_\infty +
\|\mathbf{f}\|_\infty), \\
r^{(m)} &< \mbox{\tt atol}.\\
\end{split}
\end{equation}
An additional stopping criteria consists of stopping the
iterations when the change of the discretization error between
successive iteration is small enough:
\begin{equation}
\frac{e^{(m)}-e^{(m-1)}}{e^{(m-1)}}< \mbox{\tt etol}.
\end{equation}
\subsection{$V$-cycle performances}
Table~\ref{tab:iternum} shows the numbers of $V$-cycles required to
reach the \emph{relative tolerance}
$\mbox{\tt rtol}=10^{-8}$. In these runs where $\alpha=0.5$, $\tau=1$
and $a(x,y)$ given by Eq.~\ref{eq:density}, we observe that the
biggest improvement is obtained at $\nu_1=\nu_2=2$. For larger
$\nu_1,\nu_2$, the number of required iterations is relatively insensitive
to the grid sizes. As can be seen in Fig.~\ref{fig:mg_iterations}
which plots the evolution of the error $e^{(m)}$, it
is clear that the level of discretization error has been largely
reached. Finally the times used by these runs are shown in
Fig.~\ref{fig:dddd_pc220} and Fig.~\ref{fig:nndd_pc220} where the
times spent in the direct solver using MUMPS \cite{MUMPS} are included
for comparison. For the $512\times 2048$ grid (the largest
case using the direct solver), the multigrid $V(3,3)$ is about $30$
times faster!
\begin{table}[htb]
\centering
\begin{tabular}{|l||c|c|c|c||c|c|c|c|}\hline
& \multicolumn{4}{c||}{\texttt{\textbf{DDDD}} problem}
& \multicolumn{4}{c|}{\texttt{\textbf{NNDD}} problem} \\ \cline{2-9}
Grid size & $V(1,1)$ & $V(2,2)$ & $V(3,3)$ & $V(4,4)$
& $V(1,1)$ & $V(2,2)$ & $V(3,3)$ & $V(4,4)$ \\ \hline
$16\times 64$ & 3 & 2 & 2 & 1 & 4 & 2 & 2 & 1\\
$32\times 128$ & 5 & 3 & 2 & 2 & 5 & 3 & 2 & 2\\
$64\times 256$ & 7 & 4 & 3 & 3 & 7 & 4 & 3 & 3\\
$128\times 512$ & 10 & 6 & 4 & 4 & 10 & 6 & 5 & 4\\
$256\times 1024$ & 11 & 6 & 5 & 4 & 11 & 6 & 5 & 4\\
$512\times 2048$ & 11 & 6 & 5 & 4 & 11 & 6 & 5 & 4\\
$1024\times 4096$ & 10 & 6 & 4 & 4 & 10 & 6 & 4 & 4\\
$1536\times 6144$ & 9 & 6 & 4 & 4 & 9 & 5 & 4 & 3\\
\hline
\end{tabular}
\caption{Multigrid $V$-cycle results for the {\tt DDDD} and {\tt NNDD}
model problems with $k_x=k_y=4$, $L_x=100$, $L_y=800$, $\tau=1$ and
$a(x,y)$ given by Eq.~\ref{eq:density}. Shown are the numbers of
multigrid $V$-cycles required to reduce the \emph{relative}
residual norm to less than $10^{-8}$ for different
grid sizes and numbers of pre and post relaxation
sweeps. Gauss-Seidel relaxation is used. The coarsest grid size of the
$1536\times 6144$ case is $3\times 12$ while all the others have a coarsest
grid of size $2\times 8$.}
\label{tab:iternum}
\end{table}
\begin{figure}[htb!]
\centering
\includegraphics[width=\textwidth]{\RepFigures/mg_iterations}
\caption{Performance of the $V(2,2)$-cycle using the Gauss-Seidel
relaxation scheme for the {\tt DDDD} (upper curve) and {\tt NNDD}
(lower curve) problem. The relative tolerance {\tt rtol} is
set to $10^{-8}$ the coarsest grid size for all the problem size
is fixed to $2\times 8$.}
\label{fig:mg_iterations}
\end{figure}
The fittings of the obtained data show that the multigrid
$V$ cycle cost scales almost \emph{linearly} with the number of
unknowns $N=(N_x+1)(N_y+1)$ (as does the backsolve stage of MUMPS)
while the \emph{total} direct solve time scales as $N^{1.4}$.
\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=0.8\hsize]{\RepFigures/dddd_pc220}
\caption{Times used by the multigrid $V$ cycles for the runs reported
in Table~\ref{tab:iternum} for the \texttt{\textbf{DDDD}}
problem. The last 6 $V(3,3)$ data points are used for the multigrid fit. The
MUMPS direct solver's cost is included for comparison. All the
timing results are obtained on an Intel Nehalem i7 processor, using the
Intel compiler version 13.0.1 and MUMPS-4.10.0. }
\label{fig:dddd_pc220}
\end{figure}
\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=0.8\hsize]{\RepFigures/nndd_pc220}
\caption{As in Fig.~\ref{fig:dddd_pc220} for the
\texttt{\textbf{NNDD}} problem.}
\label{fig:nndd_pc220}
\end{figure}
\FloatBarrier
\subsection{Effects of the mesh aspect ratio $\alpha$}
From Table~\ref{tab:anisotropy}, one can observe that the required number of
$V(2,2)$ cycles increase quickly when $\alpha<0.5$ and $\alpha>2$. Advanced
\emph{relaxation} methods and \emph{coarsening} strategies
\cite[chap. 7]{Wesseling} can solve this performance degradation
but are generally more difficult to parallelize.
\begin{table}[htb]
\centering
\begin{tabular}{|l||c|c|c|}\hline
$\alpha$ & \texttt{\textbf{DDDD}} & \texttt{\textbf{NNDD}} \\ \hline
0.125 & 19 & 22 \\
0.25 & 12 & 12 \\
0.5 & 6 & 6 \\
1.0 & 5 & 5 \\
2.0 & 7 & 7 \\
4.0 & 20 & 19 \\
\hline
\end{tabular}
\caption{Effects of the \emph{mesh aspect ratio} $\alpha=h_x/h_y$ on the
number of $V(2,2)$ cycles required to reach $\mbox{\tt
rtol}=10^{-8}$ for {\tt DDDD} and {\tt NNDD}
model problems. The listed $\alpha$'s are obtained by fixing
$N_x=256$, $N_y=1024$, $L_x=100$ and varying $L_y$.}
\label{tab:anisotropy}
\end{table}
\subsection{Effects of the mixed partial derivative}
When $|\tau|>2$, Table~\ref{tab:mixedterm} shows that the multigrid
$V$-cycle diverge, as predicted from the local mode analysis based on
the amplification factor given in Eq.~\ref{eq:amp_gs}. Although, a
non-negative coefficient $a$ has a \emph{stabilizing} effect, the
latter disappears already for a $256\times 1024$ grid.
\begin{table}[htb]
\centering
\begin{tabular}{|l|l|c|c|c|c|c|c|c|c|}\hline
&Grid size & $\tau=-3$ & $\tau=-2$ & $\tau=-1$ & $\tau=0$ & $\tau=1$
& $\tau=2$ & $\tau=3$ \\ \hline
\multirow{4}{*}{\texttt{\textbf{DDDD}}}
&$128\times 512(a=0)$ & - & 39 & 7 & 5 & 7 & 38 & - \\
& $128\times 512$ &16 & 6 & 5 & 4 & 4 & 6 & 17 \\
& $256\times 1024$ &- & 8 & 5 & 4 & 5 & 7 & - \\
& $512\times 2048$ &- & 9 & 5 & 4 & 5 & 9 & - \\
\hline\hline
\multirow{4}{*}{\texttt{\textbf{NNDD}}}
&$128\times 512(a=0)$ & - & 42 & 7 & 5 & 7 & 41 & - \\
& $128\times 512$ &13 & 6 & 5 & 4 & 5 & 5 & 13 \\
& $256\times 1024$ &- & 7 & 5 & 4 & 5 & 7 & - \\
& $512\times 2048$ &- & 7 & 5 & 4 & 5 & 7 & - \\
\hline
\end{tabular}
\caption{Effects of the mixed derivative term $\tau$ on the
performances of the $V(3,3)$ cycle. The dashes
indicate that the $V$-cycle diverges. In theses runs, $a(x,y)$ is given by
Eq.~\ref{eq:density} except for the cases where it is set to
0. Notice the \emph{stabilizing} effect of $a\ne 0$ for the
$128\times 512$ grid at $\tau = \pm 3$.}
\label{tab:mixedterm}
\end{table}
\subsection{Using the damped Jacobi relaxation}
The optimum Jacobi damping factor $\omega$ can be determined by minimizing
the \emph{smoothing factor} defined as the maximum amplification
coefficient (\ref{eq:amp_jac}) restricted to the \emph{oscillatory modes}:
\begin{equation}
\label{eq:mu_jac}
\mu(\omega,\alpha,\tau) = \max_{(\theta_1,\theta_2)\in\Omega}
|G(\theta_1,\theta_2,\omega,\alpha,\tau)|, \qquad \Omega =
[|\theta_1|>\pi/2]\,\bigcup\,[|\theta_2|>\pi/2].
\end{equation}
Results from numerical computation of (\ref{eq:mu_jac} are shown
in Fig.~\ref{fig:jac_opt}. An analytic expression for $\tau=0$
assuming $\alpha\le 1$ is derived in \cite[p.~119]{salpha}:
\begin{gather}
\mu(\omega,\alpha,\tau=0) =
\max\left(\left|1-2\omega\right|,\;\left|1-\frac{\alpha^2}{1+\alpha^2}\omega\right|\right),
\nonumber\\
\mu_{\mbox{opt}} = \frac{2+\alpha^2}{2+3\alpha^2} \quad
\mbox{at} \quad \omega_{\mbox{opt}} = \frac{2+2\alpha^2}{2+3\alpha^2}.
\end{gather}
Notice that the smoothing factor increases as $\alpha$ departs from 1
and for increasing $\tau$.
For Gauss-Seidel relaxation, the same numerical procedure applied to
(\ref{eq:amp_gs}) yields a smoothing factor $\mu$ equal to
respectively $0.5$, $ 0.68$ and $0.70$ for the three cases shown in
Fig.~\ref{fig:jac_opt}, which result in a better smoothing property
than the damped Jacobi relaxation.
\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=0.8\hsize]{\RepFigures/jac_opt}
\caption{The smoothing factor for damped Jacobi relaxation
for different values of $\alpha$ and $\tau$.}
\label{fig:jac_opt}
\end{figure}
Numerical experiments with the reference case ($\alpha=0.5$, $\tau=1$,
$a(x,y)$ given by Eq.~\ref{eq:density}) and the $128\times 512$ grid
using damped Jacobi relaxation, are shown in Table~\ref{tab:jac_opt}
and confirm that $\omega=0.9$ is the optimum damping factor and that
it is less efficient than the Gauss-Seidel relaxation, in agreement
with the Fourier analysis.
\begin{table}[htb]
\centering
\begin{tabular}{l c c c c c c}\hline
&$\omega=0.5$ &$\omega=0.6$ &$\omega=0.7$ &$\omega=0.8$ &$\omega=0.9$
&$\omega=1.0$ \\ \cline{2-7}
\texttt{\textbf{DDDD}}& 12 & 10 & 9 & 8 & 7 & 15 \\
\texttt{\textbf{NNDD}}& 12 & 11 & 9 & 8 & 7 & 18 \\
\hline
\end{tabular}
\caption{The number of $V(3,3)$ cycles required to obtain
$\mbox{\texttt{rtol}}=10^{-8}$ versus the Jacobi \emph{damped
factor} $\omega$. The grid size is $128\times 512$ with
$\alpha=0.5$, $\tau=1$ and $a(x,y)$ given by Eq.~\ref{eq:density}.}
\label{tab:jac_opt}
\end{table}
\subsection{Matrix storage}
Initially the \emph{Compressed Sparse Row} storage format (CSR or CRS) (see
\cite[p.~58--59]{TEMPL}) was used to store the discretized finite
difference matrix. With this choice, the CPU time used by the matrix
construction (and boundary condition setting) is found to be always larger
than the multigrid solver time as shown in
Fig.~\ref{fig:matcon_time}. Fortunately, switching to the \emph{Compressed
Diagonal Storage} (CDS), where the 9 diagonal structure of the
matrix is fully exploited, the matrix construction time is considerably
reduced as shown in the same figure. On the other hand, no
difference in the multigrid solver performance is noticeable
between the two matrix storage.
\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=\hsize]{\RepFigures/matcon_time}
\caption{CPU time used by the matrix construction for CSR and CDS
matrix storage compared to the multigrid $V(3,3)$ cycle time for
the \textbf{DDDD} and \textbf{NNDD} model problems. The timing is
obtained using the same conditions as in Fig.~\ref{fig:dddd_pc220}.}
\label{fig:matcon_time}
\end{figure}
\FloatBarrier
\section{Modified PDE}
Here, the following modified PDE is considered:
\begin{equation}
\label{eq:new_pde}
\left[\frac{\partial^2}{\partial x^2} + \tau\frac{\partial^2}{\partial
x\partial y} + (1+\tau^2/4)\frac{\partial^2}{\partial y^2} - a(x,y)\right] u(x,y) =
f(x,y), \qquad 0\le x \le L_x, \; 0\le y \le L_y.
\end{equation}
This PDE which is obtained from the $\hat s-\alpha$ model
\cite[Eq.10]{salpha} is
\emph{elliptic} for any value of $\tau$. The resulting stencil is
changed from the previous stencil \ref{eq:stencil} to
\begin{equation}
\label{eq:new_stencil}
S_{ij} = \frac{1}{h_x^2}
\begin{bmatrix}
-\tau\alpha/4 & \alpha^2(1+\tau^2/4) & \tau\alpha/4 \\
1 & -2\left[1+\alpha^2(1+\tau^2/4)\right]-h_x^2a_{ij} & 1 \\
\tau\alpha/4 & \alpha^2(1+\tau^2/4) &-\tau\alpha/4 \\
\end{bmatrix}
.
\end{equation}
Note that the
\emph{anisotropy} of the resulting Finite Difference discretization
is now $\alpha^2(1+\tau^2/4)$ and could be controlled by adjusting both
the mesh aspect ratio $\alpha$ and the \emph{shear} term $\tau$.
Numerical calculations show that the multigrid $V$-cycles do always
converge, as shown in Table~\ref{tab:new_mixedterm}.
\begin{table}[htb]
\centering
\begin{tabular}{|l|l|c|c|c|c|c|c|c|}\hline
&Grid size & $\tau=0$ & $\tau=1$ & $\tau=2$ & $\tau=4$ & $\tau=8$
& $\tau=16$ \\ \hline
\multirow{3}{*}{\texttt{\textbf{DDDD}}}
& $128\times 512$ &4 & 4 & 5 & 6 & 9 & 20 (6) \\
& $256\times 1024$ &4 & 4 & 5 & 6 & 11 & 25 (8)\\
& $512\times 2048$ &4 & 4 & 5 & 7 & 12 & 29 (8)\\
\hline\hline
\multirow{3}{*}{\texttt{\textbf{NNDD}}}
& $128\times 512$ &4 & 4 & 4 & 5 & 8 & 17 (5) \\
& $256\times 1024$ &4 & 5 & 5 & 5 & 8 & 19 (6) \\
& $512\times 2048$ &4 & 4 & 4 & 5 & 7 & 18 (6) \\
\hline
\end{tabular}
\caption{Effects of the mixed derivative term $\tau$ on the
performances of the $V(3,3)$ cycle. In theses runs, $a(x,y)$ is
given by Eq.~\ref{eq:density}. The mesh aspect ratio $\alpha=0.5$
was used. On the last column and shown in parenthesis are the
numbers of $V(3,3)$ cycles when $\alpha$ is reduced to 0.125 by
increasing the length $L_y$ while keeping all the other values fixed.}
\label{tab:new_mixedterm}
\end{table}
\section{Parallel Multigrid}
In order to maximize the parallel efficiency and the flexibility of
utilization, a two-dimensional domain partition scheme is chosen to
parallelize the multigrid solver. As shown below, generalization of this
procedure for higher dimensions is straightforward.
\subsection{Distributed grid coarsening}
The coarsening algorithm can be summarized as follow:
\begin{itemize}
\item Partition the grid points on each dimension at the
\emph{finest} grid level, as evenly as possible.
\item The range for each sub-grid, using \emph{global indexing} is
thus specified by $[s,e]$, with $s=0$ for the first sub-grid and
$e=N$ for the last sub-grid, $N$ being the number of grid intervals.
\item The next coarse sub-grid is thus obtained by discarding all the
\emph{odd} indexed grid points, as in the serial case.
\item This process can continue (as long as the total of number of
intervals is even) until there exists a prescribed \emph{minimum}
number of grid points on any sub-grid is reached.
\end{itemize}
\subsection{Matrix-free formulation}
Using standard \emph{matrix} to represent the discretized 2D (or higher
dimensional) operators imply an \emph{one-dimensional numbering} of
the grid nodes. For example on a 2D $N_x\times N_y$ grid, the 1D numbering
of the node $(x_{i_1},y_{i_2})$ could be defined as
\[
k=i_1+i_2\times N_x, \quad i_1=0:N_x,\;i_2=0:N_y.
\]
However, using 2D domain partition defined by
\begin{equation}
\label{eq:2dnumber}
i_1=s_1:e_1,\;i_2=s_2:e_2,
\end{equation}
with $s=(s_1,s_2)$ and $e=(e_1,e_2)$ denoting respectively the
\emph{starting} and \emph{ending} indices of a rectangular sub-domain,
result in a \emph{non-contiguous} set of the indices ${k}$ and in a
complicate structure of the partitioned matrix for the linear
operator.
On the other hand, using the \emph{stencil notation}
introduced in \cite[chap. 5.2]{Wesseling} based on the
\emph{multidimensional} node labeling as defined by
(\ref{eq:2dnumber}) for a 2D problem, one can define a simple data
structure for the partitioned operator, $A(i,\delta)$,
where the $d$-tuple $i=(i_1,\ldots,i_d)$ represents a node of the
$d$-dimensional grid and the $d$-tuple
$\delta=(\delta_1,\ldots,\delta_d)$, the
\emph{distance} between the connected nodes. The result of
$\mathbf{A}u$ can thus be defined as
\begin{equation}
\label{eq:vmx}
(\mathbf{A}u)_i = \sum_{\delta\in\mathbb{Z}^d}
A(i,\delta)u_{i+\delta}, \quad i=s:e.
\end{equation}
In (\ref{eq:vmx}), the sum is performed over all
indices $\delta$ such that $A(i,\delta)$ is non-zero. For the 2D
nine-point stencil defined in (\ref{eq:stencil}), the 2-tuple
$\delta$ can be specified as the 9 columns of the following
\emph{structure} matrix
\begin{equation}
\label{5points}
S_\delta = \left(
\begin{array}{rrrrrrrrr}
0 & -1 & 0 & 1 & -1 & 1 & -1& 0 & 1 \\
0 & -1 & -1&-1 & 0 & 0 & 1 & 1 & 1 \\
\end{array} \right).
\end{equation}
In the general case of a $d$-dimensional grid and $\mathcal{N}$ point stencil,
$S_\delta$ is a $d\times\mathcal{N}$
matrix. By noting that the subscript $i+\delta$ of $u$ on the right hand side of
(\ref{eq:vmx}) should be in the range $[0,N]$ \emph{only} for sub-domains which are
\emph{adjacent} to the boundary, one can deduce that for
a \emph{fixed} $\delta$, the lower and upper bounds of the indices $i$
should be
\begin{equation}
\begin{split}
i_{\mbox{min}} &= \max (0, -\delta, s), \\
i_{\mbox{max}} &= \min (N, N-\delta, e) \\
\end{split}
\end{equation}
where $N=(N_1,N_2,\ldots,N_d)$ specify the number of intervals,
since, for sub-domains \emph{not adjacent} to the boundary, $u$ should
include values at the \emph{ghost} cells $s-g$ and $e+g$ where $g$ is
given by
\begin{equation}
g = \max|S_\delta|
\end{equation}
with the operator max taken along the \emph{rows} of the matrix.
The formula defined in (\ref{eq:vmx}) can then be implemented as in
the \emph{pseudo} Fortran code
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{lstlisting}[mathescape]
do k=1,SIZE($S_{\delta}$,2) ! loop over the stencil points
$\delta$ = $S_{\delta}$(:,k)
lb = MAX(0,-$\delta$,$s$)
ub = MIN($N$,$N-\delta$,e)
do i=lb,ub
Au(i) = Au(i) + A(i,$\delta$)*u(i+$\delta$)
enddo
enddo
\end{lstlisting}
\nopagebreak\hrule
\addvspace{\medskipamount}
On the other hand, if the values of $u$ at the ghost cells of the
sub-domains \emph{adjacent} to the boundary are set to 0
\begin{equation*}
u_{-g} = u_{N+g} = 0,
\end{equation*}
the lower and upper bounds of the
inner loop can be simply set to $lb=s$ and $ub=e$. Note that the inner
loop should be interpreted as $d$ nested loops over the $d$-tuple
$i=(i_1,\ldots,i_d)$ for a $d$-dimensional problem.
\subsection{Inter-grid transfers}
\subsubsection{Restriction}
Using the definition in the first equation of (\ref{eq:2dintergrid})
together with (\ref{eq:1drestriction}), the 2D restriction operator
can be represented by the following 9-point stencil:
\begin{equation}
\label{eq:2drestriction}
\mathbf{R}_i = \frac{1}{16}
\begin{pmatrix}
1 & 2 & 1 \\
2 & 4 & 2 \\
1 & 2 & 1 \\
\end{pmatrix},
\end{equation}
and the restriction of $f$ can be computed as
\begin{equation}
\bar{f}_i = (\mathbf{R}f)_i = \sum_{\delta\in\mathbb{Z}^2}
R(i,\delta)f_{2i+\delta}, \quad i=\bar{s}:\bar{e},
\end{equation}
where $\bar{s},\bar{e}$ denote the partitioned domain boundary indices on the
\emph{coarse} grid, using the same algorithm described previously.
\subsubsection{BC for the restriction operator}
\label{sec:restrict_bc}
Dirichlel boundary conditions can be imposed by modifying the
\emph{restriction stencil} on each of the four boundaries as follow:
\begin{equation}
\mathbf{R}_{0,.} = \frac{1}{16}\begin{pmatrix}
1 & 2 & 0 \\
2 & 4 & 0 \\
1 & 2 & 0 \\
\end{pmatrix},\quad
\mathbf{R}_{N_x,.} = \frac{1}{16}\begin{pmatrix}
0 & 2 & 1 \\
0 & 4 & 2 \\
0 & 2 & 1 \\
\end{pmatrix},\quad
\mathbf{R}_{.,0} = \frac{1}{16}\begin{pmatrix}
0 & 0 & 0 \\
2 & 4 & 2 \\
1 & 2 & 1 \\
\end{pmatrix},\quad
\mathbf{R}_{.,N_y} = \frac{1}{16}\begin{pmatrix}
1 & 2 & 1 \\
2 & 4 & 2 \\
0 & 0 & 0 \\
\end{pmatrix}.
\end{equation}
With the natural Neumann BC, no change of the restriction operator is needed.
\subsubsection{Prolongation}
Stencil notation for \emph{prolongation} operators is less obvious to
formulate, see \cite[chap. 5.2]{Wesseling}. A more straightforward
implementation is obtained in the 2D case, by simply applying
\emph{bilinear interpolation} on the \emph{coarse grid}:
\begin{equation}
\label{eq:2dprolongation}
\begin{split}
(\mathbf{P}\bar{u})_{2i} &= \bar{u}_{i}, \\
(\mathbf{P}\bar{u})_{2i+e_1} &= (\bar{u}_{i} + \bar{u}_{i+e_1})/2, \quad
(\mathbf{P}\bar{u})_{2i+e_2} = (\bar{u}_{i} + \bar{u}_{i+e_2})/2, \\
(\mathbf{P}\bar{u})_{2i+e_1+e_2} &= (\bar{u}_{i} +
\bar{u}_{i+e_1} + \bar{u}_{i+e_2} + \bar{u}_{i+e_1+e_2})/4, \\
\end{split}
\end{equation}
\subsection{Relaxations}
While the Gauss-Seidel proves to be more efficient, the damped Jacobi
method, at least for a first version of the parallel multigrid solver,
is used because it is straightforward to \emph{parallelize}. The same
undamped Jacobi (with $\omega=1$) with a \emph{few} number of
iterations is also used to solve the linear system at the coarsest
mesh as prescribed by the multigrid $V$-cycle procedure defined in
section \ref{sec-mgProc}.
\subsection{Local vectors and stencils}
All local vectors (used to represent solutions or
right-hand-sides) contain \emph{ghost cells} and are implemented
using 2D arrays, for example
\[\mbox{\texttt{sol(s(1)-1:e(1)+1,s(2)-1:e(2)+1)}}\]
for the solution vector.
The partitioned stencils are defined
only for the \emph{local} grid points, without the ghost cells.
Thus, before each operation on the local vectors, an exchange (or
update) of the values on the ghost cells is performed.
As a result, all the memory required by the solver is completely
partitioned, except for the space used by the ghost cells.
\subsection{Numerical Experiments}
In this section, all the numerical experiments are conducted on
\texttt{helios.iferc-csc.org}, using the Intel compiler version 13.1.3
and bullxpmi-1.2.4.3. The \emph{stopping criteria} for the $V$-cycles
is based on the absolute and relative residual norms as well as the
discretization error norm as defined in section \ref{sec:NumExp1}. In
cases where the analytic solution is not known, the latter can be
replaced by some norm of the solution.
\subsubsection{Strong scaling}
\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=\hsize]{\RepFigures/strong_256x1024_DDD}
\caption{DDDD problem for a $256\times 1024$ size, using multigrid
$V(3,3)$ cycles. Different times for a given number of
processes are obtained with different combinations of processes in
each dimension. The number of grid levels are fixed to 6. Five Jacobi
iterations are used at the coarsest grid.}
\label{fig:strong_scal_small}
\end{figure}
\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=\hsize]{\RepFigures/strong_512x2048_DDD}
\caption{DDDD problem for a $512\times 2048$ size using multigrid
$V(3,3)$ cycles. The \textcolor{red}{red marker} on the left shows
the time for the serial multigrid solver. Different times for a
given number of processes are obtained with different combinations
of processes in each dimension. The number of grid levels are fixed
to 6. Five Jacobi iterations are used at the coarsest grid.}
\label{fig:strong_scal}
\end{figure}
Here 2 \emph{fixed} problem sizes are considered:
\begin{itemize}
\item A small size with the (fine) grid of $256\times 1024$
shown in Fig.~\ref{fig:strong_scal_small} and
\item a larger size of $512\times 2048$ in Fig.~\ref{fig:strong_scal}.
\end{itemize}
In both cases $\mbox{\tt rtol}=10^{-8}$ and $\mbox{\tt
etol}=10^{-3}$. It was checked that the results do not change when more
than 5 Jacobi iterations are used at the coarsest mesh. Notice that
for the small problem, the parallel efficiency starts to degrade at 32
MPI processes while for the larger case, this happens after 64 MPI
processes. This can be explained by
the ghost cell exchange communication overhead: denoting $N_1$ and
$N_2$, the number of grid points in each direction and $P_1$ and $P_2$
the number of MPI processes in each direction, the ratio $S/V$ between the
number of ghost points and interior grid points for each local
subdomains can be estimated as
\begin{equation}
\label{eq:comm_overhead}
S/V\simeq\frac{2(N_1/P_1+N_2/P_2)}{N_1N_2/P_1P_2} = 2\left(P_1/N_1+P_2/N_2\right).
\end{equation}
This ratio increases as the number MPI processes increases while
keeping the problem size fixed. On very coarse grids, this communication
cost can become prohibitive. For this reason, in all the runs shown
here, the number of grid points on each direction for the coarsest
grid is limited to 2.
\subsubsection{Weak Scaling}
\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=0.8\hsize]{\RepFigures/weak_DDDD}
\caption{Weak scaling for a DDDD problem, using multigrid $V(3,3)$
cycles. The number of grid levels are fixed to 7. The solver for the
coarsest grid uses 5 Jacobi iterations except for the 2 largest cases
which require respectively 20 and 100 iterations to converge. The 2
sets of curves on the right figure show respectively the timings with
and without the calculations of the residual norm and discretization
error which require both a \emph{global reduction}.}
\label{fig:weak_scal_DDDD}
\end{figure}
\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=0.8\hsize]{\RepFigures/weak_NNDD}
\caption{Weak scaling for a NNDD problem, using multigrid $V(3,3)$
cycles. The number of grid levels are fixed to 7. The solver for the
coarsest grid uses 5 Jacobi iterations except for the 2 largest cases
which require respectively 20 and 100 iterations to converge. The 2
sets of curves on the right figure show respectively the timings with
and without the calculations of the residual norm and discretization
error which require both a \emph{global reduction}.}
\label{fig:weak_scal_NNDD}
\end{figure}
According to Eq.~\ref{eq:comm_overhead}, varying the problem size
together with the number of MPI processes by keeping $N_1/P_1$ and
$N_2/P_2$ constant should yield a \emph{constant scaling},
provided that the convergence rate does not depend on the problem sizes.
The results for the \texttt{DDDD} and \texttt{NNDD} problems are shown
in Fig.~\ref{fig:weak_scal_DDDD} and
Fig.~\ref{fig:weak_scal_NNDD}. The left part of the figures shows that
the convergence rate depends only weakly on the problem sizes, which
leads indeed to a (almost) constant time obtained for numbers of MPI
processes $P$ between 16 and 1024 . The reason for the good timings
for smaller $P$ is simply that there are only 2 ghost cell exchanges
for $P=2\times 2$ (instead of 4 for $P\ge 16$) and that there is no
exchange for $P=0$.
\section{Non-homogeneous Boundary Conditions}
\subsection{Non-homogeneous Dirichlet Conditions}
Non-homogeneous Dirichlet boundary conditions can be imposed on all the
Dirichlet faces simply by \emph{clearing}, as for the
\emph{homogeneous case}, the matrice rows and columns and setting its
diagonal term to 1. Moreover, the corresponding corresponding
\emph{right-hand-side} should be set to:
\begin{equation}
\begin{split}
f_{0,j} &= D^W(y_j), \quad f_{N_x,j}=D^E(y_j), \qquad j=0,\ldots,N_y, \\
f_{i,0} &= D^S(x_i), \quad f_{j,N_y}=D^N(x_i), \qquad i=0,\ldots,N_x, \\
\end{split}
\end{equation}
where $D^W, D^E, D^S, D^N$ are the values of $u$ at the 4 Dirichlet
faces. As for the homogeneous Dirichlet BC, the \emph{restriction}
operator should be changed as described in section
\ref{sec:restrict_bc} while the \emph{prolongation} defined
in (\ref{eq:2dprolongation}) remains unchanged.
\subsection{Non-homogeneous Neumann Conditions}
The non-homogeneous Neumann conditions at the 4 faces $x=0$ can be
defined as
\begin{equation}
\begin{split}
\left.\frac{\partial u}{\partial x}\right|_{x=0} &= N^W(y), \quad
\left.\frac{\partial u}{\partial x}\right|_{x=L_x} = N^E(y), \\
\left.\frac{\partial u}{\partial y}\right|_{y=0} &= N^S(x), \quad
\left.\frac{\partial u}{\partial y}\right|_{y=L_y} = N^N(x). \\
\end{split}
\end{equation}
Discretization of the BC defined above, using the \emph{central
difference} yields on the 4 faces
\begin{equation}
\begin{split}
u_{-1,j} &= u_{1,j} -2h_xN^W(y_j), \quad
u_{N_x+1,j} = u_{N_x-1,j} + 2h_xN^E(y_j), \qquad j=0,\ldots,N_y, \\
u_{i,-1} &= u_{i,1} -2h_yN^S(x_i), \quad
u_{i,N_y+1} = u_{i,N_y-1} + 2h_yN^N(x_i), \qquad i=0,\ldots,N_x. \\
\end{split}
\end{equation}
With these relations, the stencil (\ref{eq:new_stencil}) on the 4
boundaries is modified as follow
\begin{equation}
\begin{split}
S^W &= \frac{1}{h_x^2}
\begin{bmatrix}
0 &\alpha^2(1+\tau^2/4) & 0 \\
0 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{0,j} & 2 \\
0 &\alpha^2(1+\tau^2/4) & 0 \\
\end{bmatrix}
,\quad
S^E = \frac{1}{h_x^2}
\begin{bmatrix}
0 &\alpha^2(1+\tau^2/4) & 0 \\
2 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{N_x,j} & 0 \\
0 &\alpha^2(1+\tau^2/4) & 0 \\
\end{bmatrix}
, \\
S^S &= \frac{1}{h_x^2}
\begin{bmatrix}
0 & 2\alpha^2(1+\tau^2/4) & 0 \\
1 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{i,0} & 1 \\
0 & 0 & 0 \\
\end{bmatrix}
,\quad
S^N = \frac{1}{h_x^2}
\begin{bmatrix}
0 & 0 & 0 \\
1 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{i,N_y} & 1 \\
0 & 2\alpha^2(1+\tau^2/4) & 0 \\
\end{bmatrix}
, \\
\end{split}
\end{equation}
while the right-hand-side should be changed according to
\begin{equation}
\begin{split}
f_{0,j} &\longleftarrow f_{0,j} + \frac{2}{h_x}\left[\frac{\tau\alpha}{4}\,N^W(y_{j-1})
+ N^W(y_j) - \frac{\tau\alpha}{4}\,N^W(y_{j+1})\right], \\
f_{N_x,j} &\longleftarrow f_{N_x,j} + \frac{2}{h_x}\left[\frac{\tau\alpha}{4}\,N^E(y_{j-1})
- N^E(y_j) - \frac{\tau\alpha}{4}\,N^E(y_{j+1})\right], \\
f_{i,0} &\longleftarrow f_{i,0} + \frac{2}{h_y}\left[\frac{\tau}{4\alpha}\,N^S(x_{i-1})
+ (1+\tau^2/4)N^S(x_i) - \frac{\tau}{4\alpha}\,N^S(x_{i+1})\right], \\
f_{i,N_y} &\longleftarrow f_{i,N_y} + \frac{2}{h_y}\left[\frac{\tau}{4\alpha}\,N^N(x_{i-1})
- (1+\tau^2/4)N^N(x_i) - \frac{\tau}{4\alpha}\,N^N(x_{i+1})\right]. \\
\end{split}
\end{equation}
\subsection{The NNDD test problem}
In order to test the discretization of the non-homogeneous boundary
conditions as formulated above, a test problem with the prescribed
\emph{exact} solution
\begin{equation}
u(x,y) = 1 + \sin\frac{2\pi k_xx}{L_x}\sin\frac{2\pi k_yy}{L_y},
\qquad \mbox{where $k_x$, $k_y$ are positive integers}
\end{equation}
and the following non-homogeneous boundary conditions
\begin{equation}
\begin{split}
\left.\frac{\partial u}{\partial x}\right|_{x=0} =
\left.\frac{\partial u}{\partial x}\right|_{x=L_x} &=
k_x\sin\frac{2\pi k_yy}{L_y}, \\
u(x,0) = u(x,L_y) &= 1, \\
\end{split}
\end{equation}
is solved with varying grid spacing. The discretization errors versus
the number of grid intervals $N_x$ displayed in Fig(\ref{fig:conv_nh_bc} shows
a \emph{quadratic} convergence as expected from the second order
finite differences used in both the PDE and the Neumann boundary condition
discretization.
\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=0.8\hsize]{\RepFigures/conv_nh_bc}
\caption{Convergence of the error
$\| u_{calc}- u_{anal}\|_\infty$ wrt the number of intervals in
the $x$ direction $N_x$ for the non-homogeneous NNDD
problem. Here, $L_x=100$, $L_y=800$, $k_x=k_y=4$, $\tau=1$ and $N_y=4N_x$.}
\label{fig:conv_nh_bc}
\end{figure}
As shown in Fig.(\ref{fig:nndd_nh}), the multigrid $V$-cycles for the
\emph{non-homogeneous} problem converge with a slightly smaller efficiency,
than the \emph{homogeneous} problem shown in Fig.(\ref{fig:weak_scal_NNDD}).
\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=0.8\hsize]{\RepFigures/nndd_nh}
\caption{Performances of the $V(3,3)$-cycle for the non-homogeneous
NNDD problem. The same parameters in Fig.(\ref{fig:conv_nh_bc})
are used here.}
\label{fig:nndd_nh}
\end{figure}
\FloatBarrier
\subsection{Local relaxation methods}
In addition to the damped Jacobi, three methods of relaxations are
added in this parallel multigrid solver:
\begin{enumerate}
\item The 4 color Gauss-Seidel method (RBGS).
\item The Gauss-Seidel method (GS).
\item The successive over-relaxation method (SOR).
\end{enumerate}
In order to apply correctly the parallel 4 color Gauss-Seidel, a
complicated ghost cell exchange has to be performed for each sweep
for each color. Here we simply apply the method \emph{locally} on each
subdomain with only one ghost exchange performed at the beginning of
each relaxation.
The same procedure is also used for the other 2 methods which are
inherently \emph{serial}. All these 3 relaxations are
thus only correct if there is only one subdomain. As a
consequence, while the damped Jacobi does not depend on the
partition of the subdomains, results from these 3 methods do depend on
how the domain is partitioned.
Table~\ref{tab:advrelax} show
however that all of the 3 \emph{approximated} relaxation methods produce
a much \emph{faster} convergence rate than the damped Jacobi relaxations for the NNDD test
problem considered here. The performance of the implemented solver
using the 4 relaxation methods on HELIOS is compared in
Fig,(\ref{fig:weakhelios}. The bad performance of the 4 color
Gauss-Seidel relaxations (RBGS) can be explained by the 4 nested loops
required to sweep each of the 4 colors.
\begin{table}[hbt]
\centering
\begin{tabular}{|l||r|r|r|r|r|r|}\hline
Grid Sizes & $256\times 1024$ & $512\times 2048$ & $1024\times 4096$ & $2048\times 8192$ & $4096\times 16384$ & $8192\times 32768$ \\
\hline
Process topology &
\multicolumn{1}{c|} {$1\times 1$} &
\multicolumn{1}{c|} {$2\times 2$} &
\multicolumn{1}{c|} {$4\times 4$} &
\multicolumn{1}{c|} {$8\times 8$} &
\multicolumn{1}{c|} {$16\times 16$} &
\multicolumn{1}{c|} {$32\times 32$} \\
\hline
Jacobi $\omega=0.9$ & 0.22 & 0.24 & 0.24 & 0.24 & 0.24 & 0.25 \\
RBGS & 0.05 & 0.07 & 0.10 & 0.10 & 0.12 & 0.12 \\
GS & 0.07 & 0.08 & 0.10 & 0.11 & 0.11 & 0.12 \\
SOR $\omega=1.2$ & 0.04 & 0.05 & 0.07 & 0.07 & 0.07 & 0.08 \\
\hline
\end{tabular}
\caption{Reduction factor for the residuals (obtained as the \emph{geometric mean} of
all its values except the first 2 values) for the non-homogeneous
NNDD test problem. The same parameters as in Fig.(\ref{fig:conv_nh_bc}) are used here.}
\label{tab:advrelax}
\end{table}
\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=0.9\hsize]{\RepFigures/weak_helios}
\caption{Performance of the 4 relaxations on the non-homogeneous
NNDD problem. The same parameters in Fig.(\ref{fig:conv_nh_bc})
are used here. The grid sizes used in this \emph{weak scaling}
run are shown in Table~\ref{tab:advrelax}.}
\label{fig:weakhelios}
\end{figure}
\FloatBarrier
\section{Performance of the Stencil Kernel on different platform}
To get a feeling on the performances gained on the different
platforms and how well the compilers (with their
auto-vectorization capability) support these platforms,
the following Fortran \emph{9-point stencil} kernel has been used. The
OpenMP directives are used for parallelization on both Xeon and Xeon
Phi while offload to GPU card is done via the high level OpenACC
directives. \emph{First touch} is applied in the initialization of
\texttt{x} and \texttt{mat}.
\begin{lstlisting}[language=Fortran,numbers=left,commentstyle=\color{blue},keywordstyle=\color{red},frame=single]
!$omp parallel do private(ix,iy)
!$acc parallel loop present(mat,x,y) private(ix,iy)
DO iy=0,ny
DO ix=0,nx
y(ix,iy) = mat(ix,iy,1)*x(ix-1,iy-1) &
& + mat(ix,iy,2)*x(ix, iy-1) &
& + mat(ix,iy,3)*x(ix+1,iy-1) &
& + mat(ix,iy,4)*x(ix-1,iy) &
& + mat(ix,iy,0)*x(ix,iy) &
& + mat(ix,iy,5)*x(ix+1,iy) &
& + mat(ix,iy,6)*x(ix-1,iy+1) &
& + mat(ix,iy,7)*x(ix, iy+1) &
& + mat(ix,iy,8)*x(ix+1,iy+1)
END DO
END DO
!$acc end parallel loop
!$omp end parallel do
\end{lstlisting}
The performances on a Helios dual processor node and its attached Xeon
Phi co-processor are shown in Fig.~\ref{fig:cpu_mic} while the
performances on a Cray XC30 CPU and its attached NVIDIA graphics card are
shown in Fig.~\ref{fig:cpu_gpu}. In these figures, Intel optimization
flag \texttt{-O3} and default Cray optimization were applied. In
Fig.~\ref{fig:cpu_mic_O1_O3}, the speedup by vectorization is shown by
comparing performances obtained with \texttt{-O3} and \texttt{-O1}.
Several observations can be drawn from these results.
\begin{itemize}
\item The parallel scaling, using OpenMP is linear for both Intel and
Cray compilers, when the problem sizes fit into the 20MB cache of
the Sandybridge processor. For grid sizes smaller than $32\times8$, the
overhead of thread creation dominates. When the memory footprint is
larger than the cache, 4 threads per socket already saturate the memory
bandwidth.
\item On the MIC, the parallel speedup scales linearly up to 60
cores with 1 thread per core. Using 2 or 3 threads per core does not help
while with 4 threads, the performance even degrades.
\item The MIC, using the Intel \emph{mic native} mode, does not
perform better than 8 cores of the Sandybridge processor.
\item Since the benefit from \emph{vectorization} is quite large for
the MIC (see Fig.~\ref{fig:cpu_mic_O1_O3}), the poor parallel
scalability may be explained by the low flop intensity per thread
coupled with the high overhead of the (many) thread creation and
thread synchronization.
\item The NIVIDIA card, using the high level OpenACC programming
style is more than 3 times faster than 8 Sandybridge cores, for grid
sizes larger than $1024\times256$. For smaller sizes, there are not
enough flops to keep the GPU threads busy.
\end{itemize}
\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=\hsize]{\RepFigures/cpu_mic}
\caption{Performance on the Helios dual processor (left) using the
\texttt{-O3} compiler option and on the MIC (right),
using the native mode \texttt{-mmic}.}
\label{fig:cpu_mic}
\end{figure}
\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=\hsize]{\RepFigures/cpu_gpu}
\caption{Performance on a Cray XC30 single 8 core processor node
(left) and the NVIDIA card (right) using OpenACC. Default Cray
Fortran compiler optimization has been used on both runs.}
\label{fig:cpu_gpu}
\end{figure}
\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=\hsize]{\RepFigures/cpu_mic_O1_O3}
\caption{Performance comparison between using \texttt{-O3} and
\texttt{-O1}, on the Helios dual processor (left) and on the MIC (right).}
\label{fig:cpu_mic_O1_O3}
\end{figure}
\FloatBarrier
\section{Hybrid MPI+OpenMP \texttt{PARMG (r599})}
In this version, a \emph{straightforward} parallelization is
done in the subroutines \texttt{jacobi, residue, prolong, restrict} and
\texttt{norm\_vec}, using the OpenMP work sharing directives. The
ghost cell exchange is executed by the \emph{master} thread.
All the 2D arrays (solutions, RHS, etc.) are allocated and initialized
\emph{once} by the \emph{master} thread. Dynamic array allocations
during the multigrid $V$-cycles are thus avoided.
To help further optimization, timings are introduced for each of the 4
multigrid components \texttt{jacobi, residue, prolong, restrict} and
the ghost cell \texttt{exchange} as well as on the \emph{recursive}
subroutine \texttt{mg}. Since the timings of the 4 MG components
include already calls to \texttt{exchange}, the time obtained for
\texttt{mg} should be equal to the sum of the 4 MG components and
the \emph{extras} time which includes operations in \texttt{mg} but
not in the 4 components:
\begin{equation}
\label{eq:timings}
t_\text{mg} = t_\text{jacobi} + t_\text{residue} + t_\text{prolong}
+ t_\text{restrict} + t_\text{extras}.
\end{equation}
We will see in the following sections that, in addition to these 5
contributions to $t_\text{mg}$,
there is \emph{overhead} probably due to the \emph{recursive}
calls of \texttt{mg}.
\subsection{Parallel efficiency on single node}
The comparison in Fig.~\ref{fig:single_node} shows that the pure
OpenMP version is at most $30\%$ slower than the pure MPI version when all the
16 cores are used but less than $10\%$ when only one socket is
used. The degradation of the OpenMP version can be explained by the
\emph{numa} effects when 2 sockets are used. It is also observed
that the performance level off at 4 cores, due to the
saturation of the socket memory bandwidth.
\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=0.8\hsize]{\RepFigures/single_node}
\caption{Parallel performance of the 7 level $V(3,3)$-cycle on a
dual socket Helios node ($2\times 8$ cores) for pure OpenMP and pure
MPI. The non-homogeneous NNDD problem with the same parameters in
Fig.~\ref{fig:conv_nh_bc} is considered here. The OpenMP threads and MPI
tasks are placed first on the first socket before filling the
second socket, using the \texttt{srun} option
``\texttt{--cpu\_bind=cores -m block:block}'' and the environment
variable \texttt{OMP\_PROC\_BIND=true}.}
\label{fig:single_node}
\end{figure}
\subsection{Hybrid efficiency on multi-nodes}
In the following multi-node experiments, all the 16 cores on each
Helios node are
utilized. The numbers of OpenMP threads \emph{per} MPI \emph{process} NT, the
number of MPI processes \emph{per node} NP, the number of nodes NNODES
and the \emph{total} number of MPI processes $\text{NP}_{tot}$ verify
thus the following relations:
\begin{equation}
\begin{gathered}
1\le\text{NT} \le 16, \qquad 1\le\text{NP} \le 16 \\
\text{NT}\times\text{NP} = 16 \\
\text{NP}_{tot} = 16\times\text{NNODES}/\text{NT} \\
\end{gathered}
\end{equation}
The times of the different MG components and the relative
contributions for the \emph{strong scaling}
experiments using a $1024\times 4096$ grid size, are shown in
Fig.~\ref{fig:hybrid_strong} and Fig.~\ref{fig:hybrid_strong_contrib}
respectively. The following observations can be made:
\begin{enumerate}
\item The \texttt{exchange} time increases strongly with increasing
NNODES, due to smaller partitioned subdomains and thus their larger
surface/volume ratio.
\item The pure MPI (NT=1) \texttt{exchange} time is on the other
hand reduced with
$\text{NT}>1$ since the local partitioned grid becomes larger.
\item The less efficient OpenMP parallelization (numa effects,
Amdahl's law) tends to limit however this advantage.
\item As a result, there is an optimal NT for a given NNODES:
2 for 4 and 16 nodes, 8 for 64 nodes.
\item The \texttt{jacobi} and \texttt{residue} contributions dominate
largely with $0.63\le t_\text{jacobi}/t_\text{mg}\le 0.83$ and
$0.09\le t_\text{residue}/t_\text{mg}\le 0.18$.
\item The \emph{overhead} (see Eq.~\ref{eq:timings}) times increase
with NNODES but decrease slightly for increasing NT.
\end{enumerate}
The times of the different MG components and the relative
contributions for the \emph{weak scaling}
experiments are shown in Fig.~\ref{fig:hybrid_weak} and
Fig.~\ref{fig:hybrid_weak_contrib} respectively. The following
observations can be made:
\begin{enumerate}
\item A steady increase of MG times with the number of nodes can
be attributed to the increase of ghost cells \texttt{exchange} time,
even though the amount of communications between nodes does not
change.
\item The MG performance is improved slightly when NT=2 but drop
drastically for NT=16. This seems to indicate that
\emph{numa} effects are important here, since the array initialization
is not done locally on each thread.
\item The \emph{overhead} (see Eq.~\ref{eq:timings}) times are much
smaller than in the \emph{strong scaling} runs.
\end{enumerate}
\begin{sidewaysfigure}
\centering
\includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_strong}
\caption{Detailed timings for strong scaling experiments using the same
problem parameters as in Fig.~\ref{fig:single_node}, except that 5
levels are chosen to be able to run the runs with 64 nodes.}
\label{fig:hybrid_strong}
\end{sidewaysfigure}
\begin{sidewaysfigure}
%\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_strong_contrib}
\caption{Relative contributions of each of the MG components for the
strong scaling experiments using the same
problem parameters as in Fig.~\ref{fig:single_node}.}
\label{fig:hybrid_strong_contrib}
%\end{figure}
\end{sidewaysfigure}
\begin{sidewaysfigure}
%\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_weak}
\caption{Detailed timings for weak scaling experiments using the same
problem parameters as in Fig.~\ref{fig:single_node}.}
\label{fig:hybrid_weak}
%\end{figure}
\end{sidewaysfigure}
\begin{sidewaysfigure}
%\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_weak_contrib}
\caption{Relative contributions of each of the MG components for the
weak scaling experiments using the same
problem parameters as in Fig.~\ref{fig:single_node}.}
\label{fig:hybrid_weak_contrib}
%\end{figure}
\end{sidewaysfigure}
Finally, Table~\ref{tab:memory} shows that using $\text{NT}>1$
decreases the memory needed by the multigrid procedure for both strong
and weak scaling runs.
\begin{table}[htb]
\begin{center}
\begin{tabular}{|l|c||r|r|r|r|r|}
\hline
&\texttt{\textbf{NNODES}}& NT=1 & NT=2 & NT=4 & NT=8 & NT=16 \\
\hline
\multirow{4}{*}{\texttt{\textbf{Strong Scaling}}}
&1 & 57.01 & 53.70 & 52.04 & 51.06 & 48.57 \\
&4 & 21.87 & 21.49 & 15.75 & 13.87 & 13.11 \\
&16 & 13.82 & 8.52 & 5.75 & 5.69 & 4.00 \\
&64 & 13.93 & 6.80 & 3.73 & 2.32 & 1.48 \\
\hline\hline
\multirow{4}{*}{\texttt{\textbf{Weak Scaling}}}
&1 & 57.03 & 53.71 & 52.04 & 51.08 & 48.61 \\
&4 & 59.24 & 59.18 & 53.39 & 51.52 & 48.69 \\
&16 & 60.48 & 55.33 & 52.69 & 52.74 & 49.06 \\
&64 & 63.30 & 56.13 & 53.30 & 51.58 & 48.79 \\
\hline
\end{tabular}
\end{center}
\caption{Memory footprint \emph{per core} (MB/core) for the strong scaling and weak
scaling experiments.}
\label{tab:memory}
\end{table}
\subsection{Summary and conclusions}
The \emph{strong scaling} and \emph{weak scaling} wrt NT and NNODES
are summarized in Fig.~\ref{fig:scaling}. The speed up for the strong
scaling experiments shows a good efficiency up to 16 nodes
for all NT but degrades at 64 nodes (1024 cores) due the partitioned
grid becoming too small. A good \emph{weak scaling} is also
obtained with an increase in $t_\text{mg}$ of
less than $10\%$ when NNODES vary from 4 to 64. However, for NT=16, the
efficiency drops significantly, due to the non-local memory access
when the OpenMP threads are placed on both sockets (\emph{numa} effect).
In order to improve the hybrid MPI+OpenMP multigrid, especially for large number
of threads per MPI process NT, the following optimizations should be done:
\begin{itemize}
\item \emph{First touch} array initialization in order to avoid
\emph{numa} effects.
\item OpenMP parallelization of some remaining \emph{serial} loops.
\item Better vectorization of inner loops.
\end{itemize}
The outcome of these optimization steps is important in order to run
efficiently on upcoming \emph{multicore} processors and
\emph{manycore} (MIC) devices.
\begin{figure}[htb]
\centering
\includegraphics[angle=0,width=0.9\hsize]{\RepFigures/scaling}
\caption{Strong scaling with a $1024\times 4096$ grid size (left) and
weak scaling (right) with grid sizes $1024\times 4096,10248\times 8192,
4096\times 16384$ and $8192\times32768$ respectively for 1, 4, 16
and 64 nodes.}
\label{fig:scaling}
\end{figure}
\FloatBarrier
\pagebreak
\begin{thebibliography}{99}
\bibitem{Briggs} {W.L.~Briggs, V.E.~Henson and S.F.~McCormick, A
Multigrid Tutorial, Second Edition, SIAM (2000)}.
\bibitem{MUMPS} \url{http://graal.ens-lyon.fr/MUMPS/}.
\bibitem{MG1D} {\tt Multigrid Formulation for Finite Elements},\\
\url{https://crppsvn.epfl.ch/repos/bsplines/trunk/multigrid/docs/multigrid.pdf}
\bibitem{TEMPL} {R. Barrett, M. Berry, T. F. Chan,
J. Demmel, J. Donato, J. Dongarra, V. Eijkhout,
R. Pozo, C. Romine and H. Van der Vorst,
Templates for the Solution of Linear Systems: Building Blocks for
Iterative Methods, 2nd Edition , SIAM, (1994)}.
\bibitem{Wesseling} {P.~Wesseling, An Introduction to Multigrid
Methods, Edwards, 2004}.
\bibitem{salpha} {X. Lapillonne, S. Brunner, T. Dannert, S. Jolliet,
A. Marinoni et al., Phys. Plasmas 16, 032308 (2009)}.
\end{thebibliography}
\end{document}
diff --git a/multigrid/docs/multigrid.tex b/multigrid/docs/multigrid.tex
index a8f694d..4af4d6b 100644
--- a/multigrid/docs/multigrid.tex
+++ b/multigrid/docs/multigrid.tex
@@ -1,949 +1,949 @@
%
% @file multigrid.tex
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
\documentclass[a4paper]{article}
\usepackage{linuxdoc-sgml}
\usepackage{graphicx}
\usepackage{hyperref}
\usepackage{amsmath}
%\usepackage{verbatim}
%\usepackage[notref]{showkeys}
\title{\tt Multigrid Formulation for Finite Elements.}
\author{Trach-Minh Tran, Stephan Brunner}
\date{v0.2, October 2012}
\abstract{A multigrid formulation for finite elements is
derived, using variational principles. More specifically the grid
transfer operators will be derived and tested in 1D Cartesian,
cylindrical and spherical geometry for arbitrary order B-Splines.}
\begin{document}
\maketitle
\tableofcontents
\section{The discretized problem}
Consider the one-dimensional linear integro-differential problem
\begin{equation}
\label{eq:oned_prob}
\mathcal{L}(u) = f, \qquad 0\le x\le L,
\end{equation}
with suitable boundary conditions. On an \emph{equidistant} mesh with
interval $h=L/N$ and using the \emph{weak form} of Eq.~(\ref{eq:oned_prob}),
the linear system to be solved on this grid (which will be
referred as the \emph{fine} grid) can be written as (see
\cite{SOLVERS}, \cite{BSPLINES}):
\begin{equation}
\label{eq:fine}
\sum_{i'=1}^{N+p}A_{ii'}^hu^h_{i'} = b^h_i, \qquad
A^h_{ii'}=\int_{0}^{L}\Lambda^h_i
\mathcal{L}(\Lambda^h_{i'})\,x^{\alpha}dx, \qquad
b^h_i = \int_{0}^{L}f\Lambda^h_i\,x^{\alpha}dx,
\end{equation}
where $p$ is the order the Splines $\Lambda^h_i$ and
$\alpha=0,1,2$ for Cartesian, cylindrical and spherical coordinates
respectively. It should be noted that the unknowns $u^h_i$ of this
linear system are the \emph{expansion coefficients} of the discretized
solution of the problem $u^h(x)$
\begin{equation}
u^h(x) = \sum_{i'=1}^{N+p}u^h_{i'}\Lambda^h_{i'}(x)
\end{equation}
and the right hand sides $b^h_i$ are defined as the \emph{projection} of
$f(x)$ on the same basis functions, in contrast with the Finite
Differences (FD) or Finite Volume (FV) formulations where $u^h_i$ and
$b^h_i$ are the \emph{nodal values} of $u$ and $f$.
On the \emph{coarser} mesh with interval $2h=2L/N$, the discretized
linear system can be written as
\begin{equation}
\label{eq:coarse}
\sum_{i'=1}^{N/2+p}A_{ii'}^{2h}u^{2h}_{i'} = b^{2h}_i, \qquad
A^{2h}_{ii'}=\int_{0}^{L}\Lambda^{2h}_i
\mathcal{L}(\Lambda^{2h}_{i'})\,x^{\alpha}dx, \qquad
b^{2h}_i = \int_{0}^{L}f\Lambda^{2h}_i\,x^{\alpha}dx.
\end{equation}
\section{Transfer operators}
\label{sec:twogrid}
For simplicity let consider the two-grid procedure \cite{Briggs} which can be
summarized as follow:
\begin{enumerate}
\item Obtain an approximation $\mathbf{u}^h$ on the \emph{fine} grid, using a
Gauss-Seidel (GS) or a weighted Jacobi scheme. This procedure is
also called \emph{smoothing} or \emph{relaxation}.
\item Compute the \emph{residuals}:
$\mathbf{r}^h=\mathbf{b}^h-\mathbf{A}^h\mathbf{u}^h$.
\item Obtain the residuals on the coarse mesh $\mathbf{r^{2h}}$ by
\emph{restriction} of $\mathbf{r}^h$.
\item Direct solve $\mathbf{A}^{2h}\mathbf{e}^{2h}=\mathbf{r^{2h}}$ to obtain
the error $\mathbf{e}^{2h}=\mathbf{u}-\mathbf{u}^{2h}$
\item Interpolate (\emph{prolong}) the error to obtain $\mathbf{e}^h$.
\item Correct the approximation obtained on the fine grid:
$\mathbf{u}^h\leftarrow \mathbf{u}^h+\mathbf{e}^h$.
\item Relax on $\mathbf{A}^{h}\mathbf{u}^{h}=\mathbf{f^{h}}$, using
the previously computed $\mathbf{u}^h$ as a guess.
\end{enumerate}
Steps 3 and 5 are called \emph{grid transfers} and are detailed in the
following. It should be noted that the fine to coarse transfer
(restriction) applies to the right hand side $\mathbf{b}^h$ while the
prolongation applies to the expansion coefficients $\mathbf{u}^{2h}$.
\subsection{Fine to coarse grid transfer (restriction)}
The right hand side on the fine and coarse grid can be written as
\begin{equation*}
\begin{split}
b^{h}_i &= \int_0^Lf\Lambda^{h}_i\,x^{\alpha}dx =
\sum_{i'=1}^{N+p}f^h_{i'}\underbrace{\int_0^L\Lambda^{h}_{i}\Lambda^{h}_{i'}\,x^{\alpha}dx}_
{M^{h,h}_{ii'}}, \\
b^{2h}_i &= \int_0^Lf\Lambda^{2h}_i\,x^{\alpha}dx = \sum_{i'=1}^{N+p}f^h_{i'}
\underbrace{\int_0^L\Lambda^{2h}_{i}\Lambda^{h}_{i'}\,x^{\alpha}dx}_{M^{2h,h}_{ii'}} \\
\end{split}
\end{equation*}
where the expansion $f(x)=\sum_{i=1}^{N+p}f^h_i\Lambda^h_i(x)$ has been used.
Elimination of $\mathbf{f^h}$ leads to the definition of the
\emph{restriction} matrix:
\begin{equation}
\label{eq:restriction}
\mathbf{b}^{2h} = \mathbf{R}^{2h}_{h}\mathbf{b}^h,
\qquad \boxed{\mathbf{R}^{2h}_{h}=\mathbf{M}^{2h,h}(\mathbf{M}^{h,h})^{-1}}.
\end{equation}
Note that the computation of the \emph{mass matrices}
$\mathbf{M}^{h,h}$ and $\mathbf{M}^{2h,h}$ can be done \emph{exactly}
using a Gauss integration with $N_G=\lceil p+(\alpha+1)/2 \rceil$ points.
Another way to derive the restriction operator $\mathbf{R}^{2h}_{h}$
is by noting that the basis functions $\Lambda^{2h}_{i}$ are
\emph{piecewise} $C^{p-1}_h$ \emph{polynomials} with \emph{breaks} on
the fine grid points $x_i=ih$, and thus can be expressed
\emph{uniquely} as
\begin{equation}
\Lambda^{2h}_{i}(x) = \sum_{i'=1}^{N+p}c_{ii'}\Lambda^{h}_{i'}(x),
\quad i=1\ldots N/2+p.
\end{equation}
Projecting this equation on the basis $\Lambda^{h}_j$ then leads to
\begin{equation*}
\begin{split}
\sum_{i'=1}^{N+p}c_{ii'}\int_0^L\Lambda^{h}_{i'}\Lambda^{h}_{j}\,x^{\alpha}dx
&= \int_0^L\Lambda^{2h}_{i}\Lambda^{h}_{j}\,x^{\alpha}dx, \qquad
i=1\ldots N/2+p, \quad j=1,\ldots N+p \\
\Longrightarrow \mathbf{c}\cdot\mathbf{M}^{h,h} &=
\mathbf{M}^{2h,h} \Longrightarrow \mathbf{c} =
\mathbf{M}^{2h,h}(\mathbf{M}^{h,h})^{-1} = \mathbf{R}^{2h}_{h} \\
\end{split}
\end{equation*}
and finally
\begin{equation}
\label{eq:restrict_gen}
\boxed{\Lambda^{2h}_{i}(x) =
\sum_{i'=1}^{N+p}\left(\mathbf{R}^{2h}_{h}\right)_{ii'}\Lambda^{h}_{i'}(x),
\quad i=1\ldots N/2+p}
\end{equation}
Because the expansion coefficients $c_{ii'}$ of $\Lambda^{2h}_{i}(x)$
(rows of the restriction matrix $\mathbf{R}^{2h}_{h}$) on the
fine mesh basis are \emph{unique}, $\mathbf{R}^{2h}_{h}$ should be
independent of the geometry exponent $\alpha$ or more generally, of the
definition of the \emph{projection} (or scalar product) used to
calculate the restriction matrix. Furthermore, since the supports of
both $\Lambda^{h}_i$ and $\Lambda^{2h}_i$ are \emph{compact}, the
matrix $\mathbf{R}^{2h}_{h}$ should be \emph{sparse}.
One can show that, using (\ref{eq:restrict_gen}), the
restriction of the fine mesh FE matrix $\mathbf{A}^h$ is given by
\begin{equation}
\label{eq_coarse_mat}
\mathbf{A}^{2h} = \mathbf{R}^{2h}_h\mathbf{A}^{h}\left(\mathbf{R}^{2h}_h\right)^{T}.
\end{equation}
\subsection{Coarse to fine grid transfer (prolongation)}
Let denote the discretized solution on the coarse mesh of
$\mathbf{A}^{2h}\mathbf{u}^{2h}=\mathbf{R}^{2h}_{h}\mathbf{b}^h$
by
\begin{equation*}
u^{2h}(x) = \sum_{i=1}^{N/2+p}u^{2h}_{i}\Lambda^{2h}_{i}(x),
\end{equation*}
and seek for an approximated solution on the fine mesh $\mathbf{u}^{h}$
\begin{equation*}
u^{h}(x) = \sum_{i=1}^{N+p}u^{h}_{i}\Lambda^{h}_{i}(x).
\end{equation*}
by \emph{prolongation} of $\mathbf{u}^{2h}$ (instead of solving
$\mathbf{A}^{h}\mathbf{u}^{h}=\mathbf{b}^h$). A reasonable solution is
to \emph{minimize} the square of the error norm defined as
\begin{equation*}
\begin{split}
\epsilon^2 &= \|u^{h}(x)-u^{2h}(x)\|^2 \equiv \int_0^L
[u^{h}(x)-u^{2h}(x)]^2\,x^\alpha dx, \\
\frac{\partial\epsilon^2}{\partial u^h_i} &=0 \Longrightarrow
\sum_{i'=1}^{N+p}u^{h}_{i}\int_0^L
\Lambda^{h}_{i}\Lambda^{h}_{i'}\,x^\alpha dx =
\sum_{i'=1}^{N/2+p}u^{2h}_{i}\int_0^L
\Lambda^{h}_{i}\Lambda^{2h}_{i'}\,x^\alpha dx. \\
\end{split}
\end{equation*}
This yields the prolonged (or interpolated) \emph{coarse grid}
solution on the \emph{fine grid}
\begin{equation}
\label{eq:prolongation}
\mathbf{u}^h = \mathbf{P}^h_{2h}\mathbf{u}^{2h}, \qquad
\boxed{\mathbf{P}^h_{2h} =
(\mathbf{M}^{h,h})^{-1}\mathbf{M}^{h,2h}=(\mathbf{R}^{2h}_{h})^T}
\end{equation}
and the coarse FE matrix can be finally expressed as\begin{equation}
\label{eq:coarse_mat}
\boxed{\mathbf{A}^{2h} = \mathbf{R}^{2h}_{h}\mathbf{A}^{h}\mathbf{P}^h_{2h}}
\end{equation}
\subsection{An alternative derivation of grid transfer operators}
Starting from the inter grid transformation of the basis functions
Eq.(\ref{eq:restrict_gen}), the
restriction of $\mathbf{b}^h$ and the prolongation of
$\mathbf{u}^{2h}$ can be derived as follow
\begin{gather*}
b^{2h}_i = \int_0^L f\Lambda^{2h}_i\,x^{\alpha}dx = \sum_{i'=1}^{N+p}
\left(\mathbf{R}^{2h}_{h}\right)_{ii'}\int_0^Lf\Lambda^{h}_{i'}\,x^{\alpha}dx
=\sum_{i'=1}^{N+p} \left(\mathbf{R}^{2h}_{h}\right)_{ii'}b^h_{i'}, \\
u^{2h}(x) = \sum_{i=1}^{N/2+p} u^{2h}_{i} \Lambda^{2h}_{i}=
\sum_{i'=1}^{N+p}\underbrace{\left[\sum_{i=1}^{N/2+p}
\left(\mathbf{R}^{2h}_{h}\right)_{ii'}
u^{2h}_{i}\right]}_{u^h_{i'}}\Lambda^{h}_{i'}(x) \Longrightarrow
\mathbf{u}^h = \left(\mathbf{R}^{2h}_{h}\right)^T\mathbf{u}^{2h} =
\mathbf{P}^h_{2h}\mathbf{u}^{2h}.\\
\end{gather*}
\section{Numerical results for the transfer operators}
The prolongation matrix as defined in Eq.~(\ref{eq:prolongation}) was
calculated using the BSPLINES module. A Gauss integration with
$N_G=\lceil p+(\alpha+1)/2 \rceil$ points is used to carry out the
numerical integrations.
In the following, the results are presented
for linear, quadratic and cubic Splines. Since the
restriction matrix is just the transpose of the prolongation matrix,
only the latter is shown. As expected, all the obtained matrices
are found to be \emph{independent} of $\alpha$ and \emph{sparse}.
During the calculations, it was checked that
\begin{itemize}
\item The coarse matrix computed using Eq.~(\ref{eq:coarse_mat}) and
the transfer matrix, is identical to the matrix assembled directly
on the coarse grid.
\item The sum of each row of the prolongation matrix is 1, since a
constant function ($\mathbf{u}^{2h}=1$) should remain constant after
the grid transfer.
\end{itemize}
\subsection{Linear Splines}
For $N=8$, the prolongation is a $9\times 5$ matrix given by
\begin{equation}
\mathbf{P}^{h}_{2h} =
\left(
\begin{matrix}
1 & 0 & 0 & 0 & 0 \\
1/2 & 1/2 & 0 & 0 & 0 \\
0 & 1 & 0 & 0 & 0 \\
0 & 1/2 & 1/2 & 0 & 0 \\
0 & 0 & 1 & 0 & 0 \\
0 & 0 & 1/2 & 1/2 & 0 \\
0 & 0 & 0 & 1 & 0 \\
0 & 0 & 0 & 1/2 & 1/2\\
0 & 0 & 0 & 0 & 1 \\
\end{matrix}\right)
\end{equation}
As expected, the prolongation matrix for linear Splines is
identical to the one obtained for first order FD discretization, where
a linear interpolation is used. One can easily check that
\begin{equation*}
\begin{split}
\Lambda^{2h}_1(x) &= \Lambda^h_1(x) + \frac{1}{2}\Lambda^h_2(x), \\
\Lambda^{2h}_2(x) &= \frac{1}{2}\Lambda^h_2(x) + \Lambda^h_3(x) +
\frac{1}{2}\Lambda^h_4(x), \\
\end{split}
\end{equation*}
as expected from (\ref{eq:restrict_gen}).
\subsection{Quadratic Splines}
For $N=8$, the prolongation is a $10\times 6$ matrix given by
\begin{equation}
\mathbf{P}^{h}_{2h} =
\left(
\begin{matrix}
1 & 0 & 0 & 0 & 0 & 0 \\
1/2 & 1/2 & 0 & 0 & 0 & 0 \\
0 & 3/4 & 1/4 & 0 & 0 & 0 \\
0 & 1/4 & 3/4 & 0 & 0 & 0 \\
0 & 0 & 3/4 & 1/4 & 0 & 0 \\
0 & 0 & 1/4 & 3/4 & 0 & 0 \\
0 & 0 & 0 & 3/4 & 1/4 & 0 \\
0 & 0 & 0 & 1/4 & 3/4 & 0 \\
0 & 0 & 0 & 0 & 1/2 & 1/2\\
0 & 0 & 0 & 0 & 0 & 1 \\
\end{matrix}\right)
\end{equation}
\subsection{Cubic Splines}
For $N=10$, the prolongation is a $13\times 8$ matrix given by
\begin{equation}
\mathbf{P}^{h}_{2h} =
\left(
\begin{matrix}
1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
1/2 & 1/2 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 3/4 & 1/4 & 0 & 0 & 0 & 0 & 0 \\
0 & 3/16 & 11/16 & 1/8 & 0 & 0 & 0 & 0 \\
0 & 0 & 1/2 & 1/2 & 0 & 0 & 0 & 0 \\
0 & 0 & 1/8 & 3/4 & 1/8 & 0 & 0 & 0 \\
0 & 0 & 0 & 1/2 & 1/2 & 0 & 0 & 0 \\
0 & 0 & 0 & 1/8 & 3/4 & 1/8 & 0 & 0 \\
0 & 0 & 0 & 0 & 1/2 & 1/2 & 0 & 0 \\
0 & 0 & 0 & 0 & 1/8 & 11/16 & 3/16 & 0 \\
0 & 0 & 0 & 0 & 0 & 1/4 & 3/4 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 1/2 & 1/2\\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 \\
\end{matrix}\right)
\end{equation}
Note that from the results shown above, it is straightforward to derive the
prolongation matrix for other number of intervals $N$.
\section{Practical Considerations}
\subsection{Boundary conditions}
The \emph{essential Dirichlet boundary conditions} are imposed by zeroing the column
and row (first column and first row for the left boundary and last column
and last row for the right boundary) of the FE matrix $\mathbf{A}^h$ and
putting 1 on the diagonal. The same operation should be also performed
on the prolongation matrix, preserving thus the relation
(\ref{eq:coarse_mat}). For non-homogeneous Dirichlet boundary
conditions, the elements of the column should be saved before the
zeroing operation (for example $A^h_{21}, A^h_{31}, \ldots$ for the left
boundary condition). They will be used later to modify the right hand
side:
\begin{equation*}
b^h_i \leftarrow b^h_i-A^h_{i1}u^h_1, \quad i=2,\ldots
\end{equation*}
Nothing has to be done for \emph{natural boundary conditions}.
\subsection{Residual norm and error}
The residual norm is simply defined as the Euclidean norm of the
residue:
\begin{equation}
\label{eq:resid}
\|r\|_2 = \|\mathbf{b}-\mathbf{A}\mathbf{u}\|_2 =
\sqrt{\sum_i\left(b_i-\sum_{i'}A_{ii'}u_{i'}\right)^2}.
\end{equation}
When the \emph{exact} solution $u(x)$ is known, the \emph{discretization error}
can defined as
\begin{equation}
\label{eq:discerr}
\|e\|_2 = \sqrt{\int x^\alpha dx\left[\sum_{i}u_{i}\Lambda_i(x)-u(x)\right]^2}
\end{equation}
and computed using a Gauss quadrature. Note that for Splines of order
$p$, $\|e\|_2(h)$ converges to zero as $O(h^{p+1})$.
\section{The Model Problems}
\subsection{Cartesian geometry}
The following second-order boundary value problem is considered:
\begin{equation}
\label{eq:cartesian_problem}
\begin{split}
-\frac{d^2}{dx^2} u(x) + \sigma u(x) &= \sin (\pi kx), \qquad 0\le x\le 1 \\
u(0)=u(1) &= 0 \\
\Rightarrow u(x) = \frac{\sin(\pi kx)}{\pi^2k^2+\sigma}.& \\
\end{split}
\end{equation}
Using the weak form, the FE discretized matrix and right hand side can
be computed as
\begin{equation}
A_{ii'} = \int_0^1dx\left[\Lambda'_i(x)\Lambda'_{i'}(x) +
\sigma\Lambda_i(x)\Lambda_{i'}(x)\right], \qquad
b_i = \int_0^1dx \sin (\pi kx)\Lambda_i(x).
\end{equation}
For Splines of order $p$, the integration is done with a $\lceil
p+1/2\rceil$ point Gauss quadrature which is \emph{exact}
for the matrix $\mathbf{A}$ if $\sigma$ is constant.
The boundary conditions are simply imposed by setting
\begin{equation*}
A_{ki}=A_{ik}=\delta_{ik} \qquad\mbox{and} \qquad b_k=0
\end{equation*}
for $k=1$ (the first equation) and $N+p$ (the last equation).
\subsection{Cylindrical geometry}
The following second-order boundary value problem is considered:
\begin{equation}
\begin{split}
-\frac{1}{r}\frac{d}{dr}r\frac{d}{dr}u(r) + \frac{m^2}{r^2}u(r) &=
j^2_{ms}J_{m}(j_{ms}r), \qquad 0\le r\le 1, \quad j_{ms} =
s^{th}\mbox{ zero of }J_{m}, \\
u(1) &= 0 \\
\Rightarrow u(r) = J_{m}(j_{ms}r).& \\
\end{split}
\end{equation}
Using the weak form, the FE discretized matrix and right hand side can
be computed as
\begin{equation}
A_{ii'} = \int_0^1rdr\left[\Lambda'_i(r)\Lambda'_{i'}(r) +
\frac{m^2}{r^2}\Lambda_i(r)\Lambda_{i'}(r)\right], \qquad
b_i = \int_0^1rdr j^2_{ms}\,J_{m}(j_{ms}r)\Lambda_i(r).
\end{equation}
The boundary condition has only to be imposed on the last equation,
using the same procedure described for the Cartesian geometry.
It should be noted here that for $m\neq 0$, the matrix elements
$A_{1i}$ and $A_{i1}$ \emph{diverge} since $\Lambda_1(r)$ is not
equal to zero at $r=0$. However, using a \emph{direct solver}, one can observe
that the resulting \emph{discretization errors} as defined by
Eq.(\ref{eq:discerr}) converge for number of Gauss points $N_G$ slightly
larger than $p+1$, as shown in Table~\ref{tab:gauss_conv}. Then, using
$N_G=4$ and $6$ for the linear and cubic splines respectively, the
discretization error as a function of the number of grid intervals
(Fig~\ref{fig:cyl_conv}) show the expected quadratic and quartic
scaling respectively for the linear and cubic Splines.
\begin{figure}
\centering
\includegraphics[angle=0,width=0.8\hsize]{cyl_conv}
\caption{Discretization errors $\|e\|_2$ obtained by a \emph{direct
solver} versus the number of grid intervals $N$. A linear fit
yields a quadratic scaling ($\sim N^{-2.0}$) for the linear Splines
and a quartic convergence ($\sim N^{-4.3}$) for the cubic Splines.}
\label{fig:cyl_conv}
\end{figure}
\begin{table}
\centering
\begin{tabular}{|c|c|c|}\hline
Number of Gauss points & $p=1$ & $p=3$ \\ \hline
2 & 8.319E-04 & \\
4 & 9.277E-04 & 5.799E-07 \\
6 & 9.276E-04 & 5.936E-07 \\
8 & 9.276E-04 & 5.936E-07 \\\hline
\end{tabular}
\caption{Convergence of the \emph{discretization error} with respect
to the number of Gauss points for the cylindrical problem with $m=1$,
$s=10$ on a $128$ interval grid.}
\label{tab:gauss_conv}
\end{table}
\section{The Multigrid Schemes}
The two grid procedure described in section (\ref{sec:twogrid}) can be
generalized as follow.
Let $\nu_1$, $\nu_2$ and $\mu$ be three iteration parameters.
Given a guess $\mathbf{u}^h$ and right hand side $\mathbf{b}^h$ at the
\emph{finest} level, a MG cycle represented by
\begin{equation*}
\boxed{\mathbf{u}^h \leftarrow MG^h(\mathbf{u}^h,\mathbf{b}^h)}
\end{equation*}
will compute a \emph{new} $\mathbf{u}^h$ and is defined recursively by the
following steps:
\begin{enumerate}
\item If $h$ is the coarsest mesh size, direct solve
$\mathbf{A}^h\mathbf{u}^h=\mathbf{b}^h$ and return.
\item Else
\begin{itemize}
\item Relax $\mathbf{u}^h$ $\nu_1$ times.
\item $\mathbf{b}^{2h} \leftarrow
\mathbf{R}^{2h}_h(\mathbf{b}^h-\mathbf{A}^h\mathbf{u}^h), \quad
\mathbf{u}^{2h}\leftarrow 0$.
\item $\mathbf{u}^{2h} \leftarrow MG^{2h}(\mathbf{u}^{2h},\mathbf{b}^{2h})$ $\mu$ times.
\item $\mathbf{u}^h\leftarrow
\mathbf{u}^h+\mathbf{P}^{h}_{2h}\mathbf{u}^{2h}$.
\item Relax $\mathbf{u}^h$ $\nu_2$ times.
\end{itemize}
\end{enumerate}
The standard $V$-cycle is obtained for $\mu=1$ while $\mu=2$ results in
the $W$-cycle. Usually the number of \emph{pre-smooth} and
\emph{post-smooth} sweeps $\nu_1$ and $\nu_2$ is limited to 1 or 2. In the
following a $V$-cycle will be denoted by $V(\nu_1,\nu_2)$.
Another multigrid algorithm called \emph{Full Multigrid} or FMG does
not require an input guess $\mathbf{u}^h$ but solves first the
problem on coarser grids and uses one or many MG cycles to obtain the problem
solution. It can be represented by
\begin{equation*}
\boxed{\mathbf{u}^h \leftarrow FMG^h(\mathbf{b}^h)}
\end{equation*}
and defined recursively by the following steps:
\begin{enumerate}
\item If $h$ is the coarsest mesh size, direct solve
$\mathbf{A}^h\mathbf{u}^h=\mathbf{b}^h$ and return.
\item Else
\begin{itemize}
\item $\mathbf{b}^{2h} \leftarrow \mathbf{R}^{2h}_h(\mathbf{b}^h)$.
\item $\mathbf{u}^{2h} \leftarrow FMG^{2h}(\mathbf{b}^{2h})$.
\item $\mathbf{u}^h \leftarrow \mathbf{P}^{h}_{2h}\mathbf{u}^{2h}$.
\item $\mathbf{u}^h \leftarrow MG^h(\mathbf{u}^h,\mathbf{b}^h)$ $\nu_0$ times.
\end{itemize}
\end{enumerate}
Note that while the MG process is an iterative process (started by
setting for example the initial guess $\mathbf{u}^h=0$), the FMG is
more like a \emph{direct solver} with appropriate values of $\nu_1$,
$\nu_2$ and $\nu_0$ determined experimentally.
\section{Numerical Experiments}
The residual norm $\|r\|_2$ and error $\|e\|_2$ defined
previously are reported after each $V$-cycle in
Table~\ref{tab:cartesian1} for the Cartesian model problem and in
Table~\ref{tab:cylindrical1} for the cylindrical one . The ratio
between successive cycle $\|r\|_2$
and $\|e\|_2$ are shown in columns labeled \emph{ratio} and measure
the rate of iteration convergence. The \emph{asymptotic} ratio of
$\|r\|_2$ is called the \emph{convergence factor}.
In all the cases shown, one can note that $\|e\|_2$ level off quickly to the
discretization error obtained by using the \emph{direct solver} on the
finest grid, while the residual norms $\|r\|_2$ continue to decrease until
the machine zero is eventually reached. One can also verify that the \emph{final}
discretization errors scale approximately as $8^2$ and $8^4$
respectively for linear and cubic Splines, as $N$ is increased from
$128$ to $1024$.
Most interestingly, the \emph{iterative performance} depends very weakly
on the problem size $N$, for both the Cartesian and the cylindrical
cases. Moreover, the multigrid seems to be less efficient when linear Splines are used for
the problem discretization. This iterative performance can be further improved by
increasing the \emph{iteration parameters} $\nu_1$, $\nu_2$ and $\mu$,
as shown in Table~\ref{tab:improv}. One can also observe in the same
table that the Jacobi relaxation is systematically less efficient than Gauss
Seidel relaxation.
\begin{table}
\centering
\begin{tabular}{|c|c|c|c|c|c|c|c|c|}\hline
\multicolumn{9}{|c|}{ Linear B-Splines $p=1$} \\ \hline
& \multicolumn{4}{c|}{ $N=128$}
& \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9}
$V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio &
$\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline
0 & 6.219E-02 & & 7.164E-04 & & 2.210E-02 & & 7.164E-04 & \\
1 & 2.169E-02 & 0.35 & 5.880E-05 & 0.08 & 9.699E-03 & 0.44 & 3.622E-05 & 0.05 \\
2 & 3.801E-03 & 0.18 & 7.806E-06 & 0.13 & 1.790E-03 & 0.18 & 1.965E-06 & 0.05 \\
3 & 5.061E-04 & 0.13 & 3.666E-06 & 0.47 & 2.923E-04 & 0.16 & 1.583E-07 & 0.08 \\
4 & 6.762E-05 & 0.13 & 3.564E-06 & 0.97 & 4.055E-05 & 0.14 & 6.197E-08 & 0.39 \\
5 & 8.902E-06 & 0.13 & 3.585E-06 & 1.01 & 5.586E-06 & 0.14 & 5.655E-08 & 0.91 \\
6 & 1.199E-06 & 0.13 & 3.589E-06 & 1.00 & 7.122E-07 & 0.13 & 5.622E-08 & 0.99 \\
7 & 1.585E-07 & 0.13 & 3.590E-06 & 1.00 & 9.815E-08 & 0.14 & 5.620E-08 & 1.00 \\
8 & 2.089E-08 & 0.13 & 3.590E-06 & 1.00 & 1.320E-08 & 0.13 & 5.619E-08 & 1.00 \\
9 & 2.746E-09 & 0.13 & 3.590E-06 & 1.00 & 1.887E-09 & 0.14 & 5.619E-08 & 1.00 \\
10 & 3.741E-10 & 0.14 & 3.590E-06 & 1.00 & 2.533E-10 & 0.13 & 5.619E-08 & 1.00 \\
\hline \hline
\multicolumn{9}{|c|}{ Cubic B-Splines $p=3$} \\ \hline
& \multicolumn{4}{c|}{ $N=128$}
& \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9}
$V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio &
$\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline
0 & 6.187E-02 & & 7.164E-04 & & 2.209E-02 & & 7.164E-04 & \\
1 & 1.948E-04 & 0.00 & 1.893E-06 & 0.00 & 1.685E-05 & 0.00 & 4.292E-08 & 0.00 \\
2 & 4.316E-06 & 0.02 & 3.927E-09 & 0.00 & 1.241E-07 & 0.01 & 7.156E-11 & 0.00 \\
3 & 1.554E-07 & 0.04 & 2.374E-09 & 0.60 & 4.184E-09 & 0.03 & 6.198E-13 & 0.01 \\
4 & 5.750E-09 & 0.04 & 2.373E-09 & 1.00 & 1.560E-10 & 0.04 & 5.635E-13 & 0.91 \\
5 & 2.153E-10 & 0.04 & 2.373E-09 & 1.00 & 5.912E-12 & 0.04 & 5.635E-13 & 1.00 \\
6 & 8.122E-12 & 0.04 & 2.373E-09 & 1.00 & 2.258E-13 & 0.04 & 5.635E-13 & 1.00 \\
7 & 3.079E-13 & 0.04 & 2.373E-09 & 1.00 & 8.777E-15 & 0.04 & 5.635E-13 & 1.00 \\
8 & 1.173E-14 & 0.04 & 2.373E-09 & 1.00 & 1.758E-15 & 0.20 & 5.635E-13 & 1.00 \\
9 & 4.489E-16 & 0.04 & 2.373E-09 & 1.00 & 1.709E-15 & 0.97 & 5.635E-13 & 1.00 \\
10 & 9.571E-17 & 0.21 & 2.373E-09 & 1.00 & 1.761E-15 & 1.03 &
5.635E-13 & 1.00 \\ \hline
\end{tabular}
\caption{The multigrid $V(1,1)$ performance with Gauss-Seidel relation
for a \emph{Cartesian} problem with $k=10$ and $\sigma=0$,
discretized on a grid with $N=128$ and $1024$ intervals, using
linear and cubic B-splines. For
both grid sizes, a total of 6 grid levels were considered.}
\label{tab:cartesian1}
\end{table}
\begin{table}
\centering
\begin{tabular}{|c|c|c|c|c|c|c|c|c|}\hline
\multicolumn{9}{|c|}{ Linear B-Splines $p=1$} \\ \hline
& \multicolumn{4}{c|}{ $N=128$}
& \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9}
$V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio &
$\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline
0 & 1.789E+01 & & 9.354E-02 & & 6.400E+00 & & 9.354E-02 & \\
1 & 3.373E+00 & 0.19 & 3.068E-03 & 0.03 & 1.826E+00 & 0.29 & 3.036E-03 & 0.03 \\
2 & 4.895E-01 & 0.15 & 8.064E-04 & 0.26 & 3.133E-01 & 0.17 & 1.624E-04 & 0.05 \\
3 & 6.160E-02 & 0.13 & 6.704E-04 & 0.83 & 4.581E-02 & 0.15 & 1.411E-05 & 0.09 \\
4 & 8.013E-03 & 0.13 & 6.811E-04 & 1.02 & 5.959E-03 & 0.13 & 1.062E-05 & 0.75 \\
5 & 9.871E-04 & 0.12 & 6.844E-04 & 1.00 & 8.098E-04 & 0.14 & 1.069E-05 & 1.01 \\
6 & 1.283E-04 & 0.13 & 6.847E-04 & 1.00 & 1.048E-04 & 0.13 & 1.070E-05 & 1.00 \\
7 & 1.613E-05 & 0.13 & 6.847E-04 & 1.00 & 1.504E-05 & 0.14 & 1.070E-05 & 1.00 \\
8 & 2.097E-06 & 0.13 & 6.847E-04 & 1.00 & 2.050E-06 & 0.14 & 1.070E-05 & 1.00 \\
9 & 2.639E-07 & 0.13 & 6.847E-04 & 1.00 & 3.008E-07 & 0.15 & 1.070E-05 & 1.00 \\
10 & 3.500E-08 & 0.13 & 6.847E-04 & 1.00 & 4.074E-08 & 0.14 & 1.070E-05 & 1.00 \\
\hline \hline
\multicolumn{9}{|c|}{ Cubic B-Splines $p=3$} \\ \hline
& \multicolumn{4}{c|}{ $N=128$}
& \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9}
$V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio &
$\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline
0 & 1.768E+01 & & 9.354E-02 & & 6.399E+00 & & 9.354E-02 & \\
1 & 4.243E-02 & 0.00 & 4.727E-05 & 0.00 & 4.975E-03 & 0.00 & 6.588E-06 & 0.00 \\
2 & 1.378E-03 & 0.03 & 1.897E-06 & 0.04 & 7.835E-05 & 0.02 & 6.578E-09 & 0.00 \\
3 & 4.773E-05 & 0.03 & 1.814E-06 & 0.96 & 2.797E-06 & 0.04 & 4.125E-10 & 0.06 \\
4 & 2.174E-06 & 0.05 & 1.814E-06 & 1.00 & 1.041E-07 & 0.04 & 4.092E-10 & 0.99 \\
5 & 4.816E-07 & 0.22 & 1.814E-06 & 1.00 & 3.935E-09 & 0.04 & 4.092E-10 & 1.00 \\
6 & 1.942E-07 & 0.40 & 1.814E-06 & 1.00 & 1.499E-10 & 0.04 & 4.092E-10 & 1.00 \\
7 & 8.887E-08 & 0.46 & 1.814E-06 & 1.00 & 5.757E-12 & 0.04 & 4.092E-10 & 1.00 \\
8 & 4.449E-08 & 0.50 & 1.814E-06 & 1.00 & 2.517E-13 & 0.04 & 4.092E-10 & 1.00 \\
9 & 2.377E-08 & 0.53 & 1.814E-06 & 1.00 & 1.360E-13 & 0.54 & 4.092E-10 & 1.00 \\
10 & 1.328E-08 & 0.56 & 1.814E-06 & 1.00 & 1.384E-13 & 1.02 & 4.092E-10 & 1.00 \\ \hline
\end{tabular}
\caption{The multigrid $V(1,1)$ performance with Gauss-Seidel relation
for a one-dimensional \emph{cylindrical} problem with $m=22$ and
$s=10$, discretized on a grid with
$N=128$ and $1024$ intervals, using linear and cubic B-splines. For
both grid sizes, a total of 6 grid levels were considered.}
\label{tab:cylindrical1}
\end{table}
\begin{table}
\centering
\begin{tabular}{|c|c|c|c|c|}\hline
& \multicolumn{2}{c|}{ Cartesian problem}
& \multicolumn{2}{c|}{ Cylindrical problem} \\ \cline{2-5}
& $N=128$ & $N=1024$ & $N=128$ & $N=1024$\\ \hline
$\nu_1=1, \nu_2=1, \mu=1$ & 0.13 & 0.14 & 0.13 & 0.14 \\
$\nu_1=1, \nu_2=2, \mu=1$ & 0.08 & 0.08 (\emph{0.10}) & 0.08 & 0.08 (\emph{0.09}) \\
$\nu_1=2, \nu_2=1, \mu=1$ & 0.08 & 0.08 & 0.08 & 0.08 \\
$\nu_1=2, \nu_2=2, \mu=1$ & 0.04 & 0.04 (\emph{0.08}) & 0.02 & 0.03 (\emph{0.08})\\
$\nu_1=1, \nu_2=1, \mu=2$ & 0.12 & 0.11 & 0.12 & 0.11 \\ \hline
\end{tabular}
\caption{The \emph{convergence factor} (averaged over
the last 5 cycles) for different iteration parameters $\nu_1$, $\nu_2$ and
$\mu$, using the linear Splines for both Cartesian ($k=10$,
$\sigma=0$) and cylindrical ($m=22$, $s=10$) problems. The last
entry is usually called
a $W$-cycle while the first four designate a $V(\nu_1,\nu_2)$
cycle. Gauss Seidel relaxation is used except for the results enclosed
in parenthesis which are obtained with the Jacobi
(weighted with $\omega=2/3$) relaxation.}
\label{tab:improv}
\end{table}
The next experiment is shown on Table~\ref{tab:fmg}, where two FMG$(\nu_1,\nu_2)$
schemes are applied to the $m=22$, $s=10$ cylindrical problem with
grid sizes up to $N=2048$. Note that the problem is solved to the level of
discretization for $N\ge 128$ with FMG$(2,1)$ but not with FMG$(1,1)$. Solving the same
problem with the $V(2,1)$ cycle required 3 iterations for all the values of
$N$ shown. Since the cost of one FMG(2,1) is $\sim 2$ the cost
of one $V(2,1)$ (see Appendix \ref{sec:cost}), it appears that FMG is
more efficient for $N\ge 128$.
Finally, in all the cases shown here, the equality (\ref{eq:coarse_mat})
is verified numerically, except for the cylindrical case with $m\neq 0$. This is
expected since as noted earlier, the matrix elements $A_{i1}$ and
$A_{1i}$ diverge unless $m=0$ in the cylindrical problem.
\begin{table}
\centering
\begin{tabular}{|c|c|c|c|c|}\hline
& \multicolumn{2}{c|}{FMG(1,1)}
& \multicolumn{2}{c|}{FMG(2,1)} \\ \cline{2-5}
$N$ & $\|e\|_2$ & $\|e\|_2/\|e\|_d$ & $\|e\|_2$ & $\|e\|_2/\|e\|_d$ \\ \hline
4 & 1.011E-01 & 0.968 & 1.012E-01 & 0.969 \\
8 & 7.781E-02 & 1.031 & 7.679E-02 & 1.018 \\
16 & 3.332E-02 & 1.310 & 2.808E-02 & 1.104 \\
32 & 1.516E-03 & 1.421 & 1.098E-03 & 1.030 \\
64 & 5.168E-05 & 1.443 & 3.652E-05 & 1.019 \\
128 & 2.012E-06 & 1.109 & 1.818E-06 & 1.002 \\
256 & 1.125E-07 & 1.053 & 1.069E-07 & 1.001 \\
512 & 6.819E-09 & 1.037 & 6.576E-09 & 1.000 \\
1024 & 4.224E-10 & 1.032 & 4.093E-10 & 1.000 \\
2048 & 2.634E-11 & 1.031 & 2.556E-11 & 1.000 \\
\hline
\end{tabular}
\caption{The discretization errors $\|e\|_2$ obtained from a
FMG$(\nu_1,\nu_2)$ sweep with $\nu_0=1$ for different grid sizes $N$. The columns
$\|e\|_2/\|e\|_d$ display their ratio with the discretization errors
obtained from a \emph{direct} solver. The cylindrical problem with
$m=22$ and $s=10$ using cubic Splines is considered here.}
\label{tab:fmg}
\end{table}
\section{Periodic Case}
\subsection{Transfer operators}
For periodic problems, we use \emph{periodic} Splines \cite{BSPLINES} which
satisfy the periodic boundary condition
$\Lambda^h_{i+N}(x)=\Lambda^h_i(x-Nh)$. As a result, both the expansion
coefficients and the right hand sides are periodic
with periodicity $N$ ($u^h_{i+N}=u^h_i$, $b^h_{i+N}=b^h_i$) and he rank
of all matrices should be $N$ instead of $N+p$ as in the non-periodic case.
The \emph{prolongation} matrix $\mathbf{P}^h_{2h}$ as given by
(\ref{eq:prolongation}) are computed numerically and the results
for $N=8$ are given below for linear, quadratic and cubic Splines.
\begin{itemize}
\item Linear Splines
\begin{equation}
\mathbf{P}^{h}_{2h} =
\left(
\begin{matrix}
1 & 0 & 0 & 0 \\
1/2 & 1/2 & 0 & 0 \\
0 & 1 & 0 & 0 \\
0 & 1/2 & 1/2 & 0 \\
0 & 0 & 1 & 0 \\
0 & 0 & 1/2 & 1/2 \\
0 & 0 & 0 & 1 \\
1/2 & 0 & 0 & 1/2 \\
\end{matrix}\right)
\end{equation}
\item Quadratic Splines
\begin{equation}
\mathbf{P}^{h}_{2h} =
\left(
\begin{matrix}
3/4 & 1/4 & 0 & 0 \\
1/4 & 3/4 & 0 & 0 \\
0 & 3/4 & 1/4 & 0 \\
0 & 1/4 & 3/4 & 0 \\
0 & 0 & 3/4 & 1/4 \\
0 & 0 & 1/4 & 3/4 \\
1/4 & 0 & 0 & 3/4 \\
3/4 & 0 & 0 & 1/4 \\
\end{matrix}\right)
\end{equation}
\item Cubic Splines
\begin{equation}
\mathbf{P}^{h}_{2h} =
\left(
\begin{matrix}
1/2 & 1/2 & 0 & 0 \\
1/8 & 3/4 & 1/8 & 0 \\
0 & 1/2 & 1/2 & 0 \\
0 & 1/8 & 3/4 & 1/8 \\
0 & 0 & 1/2 & 1/2 \\
1/8 & 0 & 1/8 & 3/4 \\
1/2 & 0 & 0 & 1/2 \\
3/4 & 1/8 & 0 & 1/8 \\
\end{matrix}\right)
\end{equation}
\end{itemize}
The restriction matrix is simply $\mathbf{R}^{2h}_h=
(\mathbf{P}^{h}_{2h})^T$. Generalization for any other number of intervals $N$ should be
straightforward.
\subsection{Numerical Experiments}
In order to test the grid transfer operators obtained above, the same
second-order problem (\ref{eq:cartesian_problem}) but with the
periodic boundary condition $u(x+1)=u(x)$ is considered. It should be
noted that in that case, if $\sigma=0$, the problem is singular since
the solution is not \emph{unique}! But we have observed that this
problem can be avoided for a slightly non zero $\sigma$,
With $\sigma=0.01$ and $k=10$ and using linear and cubic Splines, we
recover the same multigrid iterative performances shown in Table
\ref{tab:cartesian1} obtained previously for non-periodic Dirichlet
boundary conditions. Table \ref{tab:quad_splines} also shows similar
iterative efficiencies for \emph{quadratic} non-periodic and periodic problems.
The identity (\ref{eq:coarse_mat}) is numerically verified in all the
cases considered.
\begin{table}
\centering
\begin{tabular}{|c|c|c|c|c|c|c|c|c|}\hline
\multicolumn{9}{|c|}{ Cartesian problem with quadratic splines} \\ \hline
& \multicolumn{4}{c|}{ $N=128$}
& \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9}
$V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio &
$\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline
0 & 6.203E-02 & & 7.164E-04 & & 2.209E-02 & & 7.164E-04 & \\
1 & 8.114E-04 & 0.01 & 6.375E-06 & 0.01 & 1.003E-04 & 0.00 & 4.509E-07 & 0.00 \\
2 & 1.891E-05 & 0.02 & 6.079E-08 & 0.01 & 1.769E-06 & 0.02 & 8.061E-10 & 0.00 \\
3 & 1.103E-06 & 0.06 & 5.220E-08 & 0.86 & 7.018E-08 & 0.04 & 9.970E-11 & 0.12 \\
4 & 8.148E-08 & 0.07 & 5.220E-08 & 1.00 & 5.620E-09 & 0.08 & 9.958E-11 & 1.00 \\
5 & 6.368E-09 & 0.08 & 5.220E-08 & 1.00 & 4.772E-10 & 0.08 & 9.958E-11 & 1.00 \\
6 & 4.969E-10 & 0.08 & 5.220E-08 & 1.00 & 4.101E-11 & 0.09 & 9.958E-11 & 1.00 \\
7 & 3.874E-11 & 0.08 & 5.220E-08 & 1.00 & 3.548E-12 & 0.09 & 9.958E-11 & 1.00 \\
8 & 3.081E-12 & 0.08 & 5.220E-08 & 1.00 & 3.081E-13 & 0.09 & 9.958E-11 & 1.00 \\
9 & 2.489E-13 & 0.08 & 5.220E-08 & 1.00 & 2.690E-14 & 0.09 & 9.958E-11 & 1.00 \\
10 & 1.986E-14 & 0.08 & 5.220E-08 & 1.00 & 3.212E-15 & 0.12 & 9.958E-11 & 1.00 \\
\hline \hline
\multicolumn{9}{|c|}{ Periodic problem with quadratic splines} \\ \hline
& \multicolumn{4}{c|}{ $N=128$}
& \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9}
$V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio &
$\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline
0 & 6.203E-02 & & 7.164E-04 & & 2.209E-02 & & 7.164E-04 & \\
1 & 1.285E-03 & 0.02 & 1.294E-05 & 0.02 & 3.116E-04 & 0.01 & 5.862E-06 & 0.01 \\
2 & 7.878E-05 & 0.06 & 6.569E-07 & 0.05 & 2.893E-05 & 0.09 & 1.626E-07 & 0.03 \\
3 & 6.573E-06 & 0.08 & 6.511E-08 & 0.10 & 2.691E-06 & 0.09 & 5.631E-09 & 0.03 \\
4 & 5.681E-07 & 0.09 & 5.224E-08 & 0.80 & 2.385E-07 & 0.09 & 2.400E-10 & 0.04 \\
5 & 4.890E-08 & 0.09 & 5.219E-08 & 1.00 & 2.097E-08 & 0.09 & 9.997E-11 & 0.42 \\
6 & 4.198E-09 & 0.09 & 5.219E-08 & 1.00 & 1.828E-09 & 0.09 & 9.958E-11 & 1.00 \\
7 & 3.607E-10 & 0.09 & 5.219E-08 & 1.00 & 1.584E-10 & 0.09 & 9.958E-11 & 1.00 \\
8 & 3.103E-11 & 0.09 & 5.219E-08 & 1.00 & 1.370E-11 & 0.09 & 9.958E-11 & 1.00 \\
9 & 2.674E-12 & 0.09 & 5.219E-08 & 1.00 & 1.184E-12 & 0.09 & 9.958E-11 & 1.00 \\
10 & 2.307E-13 & 0.09 & 5.219E-08 & 1.00 & 1.025E-13 & 0.09 & 9.958E-11 & 1.00 \\\hline
\end{tabular}
\caption{The multigrid $V(1,1)$ performance with Gauss-Seidel relation
for \emph{Cartesian} problem ($k=10$, $\sigma=0$) and
\emph{periodic} problem ($k=10$, $\sigma=0.01$), discretized on a grid with
$N=128$ and $1024$ intervals, using quadratic B-splines. For
both grid sizes, a total of 6 grid levels were considered.}
\label{tab:quad_splines}
\end{table}
\section{Conclusion}
Using the variational principle, we have derived the expressions of
the grid transfer matrices for Finite Elements using Splines of any
order. It is found that:
\begin{itemize}
\item The grid transfer matrices do not depend of the geometries
characterized by the Jacobian as defined in $dV = x^\alpha dx$.
\item The standard grid transfer operator used for first order finite
difference (FD) discretization for Cartesian geometry is recovered when
linear Spline finite elements (FE) are used.
\item Applying these transfer matrices, we have solved Cartesian,
cylindrical as well as periodic one dimensional
problems, and obtained essentially the same multigrid iterative performances
as found for standard first order FD Cartesian problems.
\item No performance \emph{degradation} is observed when the order of Splines FE
is increased from 1 to 3, or when the cylindrical geometry is considered.
\end{itemize}
For two dimensional problems, notice that for both Cartesian ($dV=dxdy$) and
standard curvilinear geometries ($dV=r^\alpha drd\theta$), the
Jacobian is \emph{separable}. Using this property, one can show that
the two dimensional grid transfer consists of simply applying
successively one dimensional grid transfer on each of the $x$ and
$y$ (or $r$ and $\theta$) grids. With the solution
$\mathbf{u}^h=[u^h_{ij}]$ and right hand side
$\mathbf{b}^h=[b^h_{ij}]$ defined by
\begin{equation}
u(x,y) = \sum_{ij} u^h_{ij}\Lambda^h_i(x)\Lambda^h_j(y), \qquad
b^h_{ij} = \int dx\Lambda^h_i(x)\int dy\Lambda^h_j(y)f(x,y),
\end{equation}
the two dimension grid transfers can be expressed as (see Appendix \ref{sec:twod})
\begin{equation}
\begin{split}
\mathbf{u}^h &= {_{x}\mathbf{P}^h_{2h}}\; \mathbf{u}^{2h}
\left(_{y}\mathbf{P}^h_{2h}\right)^T, \\
\mathbf{b}^{2h} &={_{x}\mathbf{R}^{2h}_{h}}\; \mathbf{b}^{h}
\left(_{y}\mathbf{R}^{2h}_{h}\right)^T. \\
\end{split}
\end{equation}
For more general curvilinear coordinates such as found in tokamak
magnetic coordinates defined by $dV=J(s,\theta)dsd\theta$, we will
assume that the grid transfer operators derived above are still
applicable. The validity of this assumption will be the object of the
next task.
\appendix
\section{Multigrid Cost Estimation}
\label{sec:cost}
Assuming that the \emph{coarsest} grid is fixed to $2$, the total
number of grid levels $L$ is given by $N/2^{L-1}=2$ or $L=\log_2(N)$, where $N$
is the number of intervals in the \emph{finest grid}. Since both
\emph{relaxation} and intergrid transfer are proportional to the
number of problem unknowns, the cost of the
$V$-cycle can be estimated as:
\begin{equation}
\begin{split}
\mbox{MG}(N) &= c\left[ (N+p)+(N/2+p) +\ldots + (N/2^{L-2}+p) \right] \\
&= c \left[ 2N-4 +(L-1)p \right] ,\\
\end{split}
\end{equation}
where $p$ is the order of Splines used for the discretization. The FMG
can then be deduced, assuming $\nu_0=1$ as
\begin{equation}
\begin{split}
\mbox{FMG}(N) &= \mbox{MG}(N) +\mbox{MG}(N/2) + \ldots + \mbox{MG}(N/2^{L-2})\\
&= c\left[ 4N-8 +(L-1)(pL/2-4) \right] .\\
\end{split}
\end{equation}
As expected a single FMG cycle (with $\nu_0=1$) costs about two $V$-cycles.
\section{Two dimensional Grid Transfer}
\label{sec:twod}
On the fine and the coarse grids, the problem solution $u(x,y)$ can be written
as:
\begin{equation*}
u(x,y) = \sum_{i'j'} u^h_{i'j'}\Lambda^h_{i'}(x)\Lambda^h_{j'}(y)
= \sum_{i'j'} u^{2h}_{i'j'}\Lambda^{2h}_{i'}(x)\Lambda^{2h}_{j'}(y).
\end{equation*}
Projecting these two expansions on the two dimensional basis
functions $\Lambda^h_{i}(x)\Lambda^h_{j}(y)$ yields
\begin{gather*}
\sum_{i'j'} u^h_{i'j'}
\underbrace{\int dx\Lambda^h_{i}(x)\Lambda^h_{i'}(x)}_{M^{h,h}_{ii'}}
\underbrace{\int dy\Lambda^h_{j}(x)\Lambda^h_{j'}(y)}_{N^{h,h}_{jj'}}
= \sum_{i'j'} u^{2h}_{i'j'}
\underbrace{\int dx\Lambda^h_{i}(x)\Lambda^{2h}_{i'}(x)}_{M^{h,2h}_{ii'}}
\underbrace{\int
dy\Lambda^h_{j}(x)\Lambda^{2h}_{j'}(y)}_{N^{h,2h}_{jj'}} \\
\Longrightarrow \quad
\mathbf{M}^{h,h}\;\mathbf{u}^h\;\left(\mathbf{N}^{h,h}\right)^T =
\mathbf{M}^{h,2h}\mathbf{u}^{2h}\left(\mathbf{N}^{h,2h}\right)^T \\
\Longrightarrow \quad
\mathbf{u}^h = \left(\mathbf{M}^{h,h}\right)^{-1}
\mathbf{M}^{h,2h}\;\mathbf{u}^{2h}\;\left[\left(\mathbf{N}^{h,h}\right)^{-1}\mathbf{N}^{h,2h}\right]^T.
\end{gather*}
The right hand side can be written on the fine and coarse grids as
\begin{equation*}
\begin{split}
b^{h}_{ij} = \int dx\Lambda^{h}_i(x)\int dy\Lambda^{h}_j(y)\;f(x,y)
= \sum_{i'j'} M^{h,h}_{ii'}f^{h}_{i'j'}N^{h,h}_{jj'} \quad\Longrightarrow\quad
\mathbf{b}^h &= \mathbf{M}^{h,h}\;\mathbf{f}^h\left(\mathbf{N}^{h,h}\right)^{T}, \\
b^{2h}_{ij} = \int dx\Lambda^{2h}_i(x)\int dy\Lambda^{2h}_j(y)\;f(x,y)
= \sum_{i'j'} M^{2h,h}_{ii'}f^{h}_{i'j'}N^{2h,h}_{jj'} \quad\Longrightarrow\quad
\mathbf{b}^{2h} &= \mathbf{M}^{2h,h}\;\mathbf{f}^h\left(\mathbf{N}^{2h,h}\right)^{T}, \\
\end{split}
\end{equation*}
where the expansion of $f(x,y)$ on the \emph{fine} mesh has been
used. Elimination of $\mathbf{f}^h$ then yields
\begin{equation*}
\mathbf{b}^{2h} =
\mathbf{M}^{2h,h}\left(\mathbf{M}^{h,h}\right)^{-1} \;
\mathbf{b}^h\;
\left[\mathbf{N}^{2h,h}\left(\mathbf{N}^{h,h}\right)^{-1} \right]^T.
\end{equation*}
\begin{thebibliography}{99}
\bibitem{SOLVERS} {\tt The Solvers in BSPLINES},
\url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/solvers.pdf}
\bibitem{BSPLINES} {\tt BSPLINES} Reference Guide,
\url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/bsplines.pdf}
\bibitem{Briggs} {W.L.~Briggs, V.E.~Henson and S.F.~McCormick, A
Multigrid Tutorial, Second Edition, Siam (2000)}.
\end{thebibliography}
\end{document}
diff --git a/multigrid/docs/multigrid_2d.tex b/multigrid/docs/multigrid_2d.tex
index a373364..43be13c 100644
--- a/multigrid/docs/multigrid_2d.tex
+++ b/multigrid/docs/multigrid_2d.tex
@@ -1,425 +1,425 @@
%
% @file multigrid_2d.tex
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
-% spclibs is free software: you can redistribute it and/or modify it under
+% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
-% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% @authors
% (in alphabetical order)
% @author Trach-Minh Tran
%
\documentclass[a4paper]{article}
\usepackage{linuxdoc-sgml}
\usepackage{graphicx}
\usepackage{hyperref}
\usepackage{amsmath}
\title{Multigrid for Finite Elements using Splines.}
\author{Trach-Minh Tran, Stephan Brunner}
\date{v0.1, January 2013}
\abstract{A multigrid formulation for finite elements is
derived, using variational principles. More specifically the grid
transfer operators will be derived and tested in 2D Cartesian and
cylindrical geometry for arbitrary order B-Splines.}
\begin{document}
\maketitle
\tableofcontents
\section{The Model Problems}
\subsection{Cartesian Geometry}
The following second-order boundary value problem is considered
\begin{equation}
\label{eq:cartesian_problem}
\begin{split}
-\left[\frac{\partial^2}{\partial x^2}
+\frac{\partial^2}{\partial y^2} \right] u(x,y) &= f(x,y) \qquad 0\le x\le 1,\quad 0\le y\le 1 \\
u(0,y) = u(1,y) &= u(x,0) = u(x,1) = 0.
\end{split}
\end{equation}
By choosing
\begin{equation*}
f(x,y) = \sin (\pi k_xx + \pi k_yy),
\end{equation*}
where $k_x$ and $k_y$ are integers, the solution of the BVP is simply
\begin{equation*}
u(x,y) = \frac{\sin(\pi k_xx+\pi k_yy)}{\pi^2(k_x^2+k_y^2)}.
\end{equation*}
Using a weak formulation on Eq.(\ref{eq:cartesian_problem}) and a grid
of $N_x\times N_y$ intervals, one obtains the following discretized
linear system
\begin{equation}
\sum_{i'=1}^{N_x+p}\sum_{j'=1}^{N_y+p}A_{iji'j'}u_{i'j'} = b_{ij},
\qquad i=1,\ldots,N_x+p,\quad j=1,\ldots,N_y+p,
\end{equation}
where the unknowns $u_{ij}$ are the Spline (of order $p$) expansion
coefficients of the solution
\begin{equation}
u(x,y) = \sum_{i=1}^{N_x+p}\sum_{j=1}^{N_y+p}u_{ij}\Lambda_i(x)\Lambda_j(y),
\end{equation}
and the matrix $A$ and right hand side $b$ are determined from
\begin{align}
A_{iji'j'} &= \int^1_0\int^1_0 dxdy
\left[\Lambda'_{i'}(x)\Lambda_{j'}(y)\Lambda'_i(x)\Lambda_j(y) +
\Lambda_{i'}(x)\Lambda'_{j'}(y)\Lambda_i(x)\Lambda'_j(y) \right], \\
b_{ij} &= \int^1_0\int^1_0 dxdy\Lambda_i(x)\Lambda_j(y)f(x,y).
\end{align}
Note that using a Gauss quadrature with $\lceil(2p+1)/2\rceil$ points
per interval to calculate the matrix $A$ would yield an exact integration.
\subsection{Cylindrical Geometry}
The following second-order boundary value problem is considered:
\begin{equation}
\label{eq:cylindrical_problem}
\begin{split}
-\left[\frac{1}{r}\frac{\partial}{\partial r}r\frac{\partial}{\partial r}
+ \frac{1}{r^2} \frac{\partial^2}{\partial\theta^2}
\right]u(r,\theta) &= f(r,\theta) \qquad 0\le r\le 1,\quad
0 \le \theta < 2\pi \\
u(1,\theta) &= 0, \\
\end{split}
\end{equation}
By choosing
\begin{equation*}
f(r,\theta) = j^2_{ms} J_{m}(j_{ms}r)\cos(m\theta),
\end{equation*}
where $m$ is an integer and $j_{ms}$, the $s^{th}$ zero of $J_{m}$,
the solution of this BVP is
\begin{equation*}
u(r,\theta) = J_{m}(j_{ms}r)\cos(m\theta).
\end{equation*}
Using a weak formulation on Eq.(\ref{eq:cylindrical_problem})and a grid
of $N_r\times N_\theta$ intervals, one obtains the following discretized
linear system
\begin{equation}
\sum_{i'=1}^{N_r+p}\sum_{j'=1}^{N_\theta}A_{iji'j'}u_{i'j'} = b_{ij},
\qquad i=1,\ldots,N_r+p,\quad j=1,\ldots,N_\theta,
\end{equation}
where the unknowns $u_{ij}$ are the Spline (of order $p$) expansion
coefficients of the solution
\begin{equation}
u(r,\theta) = \sum_{i=1}^{N_r+p}\sum_{j=1}^{N_\theta}u_{ij}\Lambda_i(r)\Lambda_j(\theta),
\end{equation}
and the matrix $A$ and right hand side $b$ are determined from
\begin{align}
A_{iji'j'} &= \int^1_0\int^{2\pi}_0 rdrd\theta
\left[\Lambda'_{i'}(r)\Lambda_{j'}(\theta)\Lambda'_i(r)\Lambda_j(\theta) +
\frac{1}{r^2}
\Lambda_{i'}(r)\Lambda'_{j'}(\theta)\Lambda_i(r)\Lambda'_j(\theta)
\right], \\
b_{ij} &= \int^1_0\int^{2\pi}_0 rdrd\theta\Lambda_i(r)\Lambda_j(\theta)f(r,\theta).
\end{align}
Note that $A$ has an $1/r$ singularity in the
integrand. For $m\neq0$, this should not be problematic since the
converged solution behaves as $\sim r^m$ near $r=0$. The case $m=0$
will be investigated numerically latter in this report, together withe
the $m\neq 0$ case.
\section{Restriction Operator}
In the following, let us use the superscripts $h$ and $2h$ to denote
quantities defined respectively on a \emph{fine} ($N_x\times N_y$
or $N_r\times N_\theta$) and a \emph{coarser} ($N_x/2\times N_y/2$
or $N_r/2\times N_\theta/2$) grid.
The two grid transfers required in the standard \emph{multigrid}
\cite{MG1D,Briggs} are:
\begin{enumerate}
\item the \emph{restriction} of the right hand side: $\mathbf{b}^{h}
\longrightarrow \mathbf{b}^{2h}$ and
\item the \emph{prolongation} of the solution: $\mathbf{u}^{2h}
\longrightarrow \mathbf{u}^{h}$.
\end{enumerate}
Noting that the basis functions $\Lambda^{2h}_i(x)$, which are \emph{piecewise}
$C^{p-1}$ polynomials with \emph{breaks} on the \emph{coarse} grid
points $x^{2h}_k=(2h)k$ can be also considered as \emph{piecewise}
$C^{p-1}$ polynomials with \emph{breaks} on the \emph{fine} grid
$x^h_k=kh$, they can be expressed \emph{uniquely} as a linear
combination of the \emph{fine} grid basis functions:
\begin{equation}
\label{eq:basis_transf}
\Lambda^{2h}_i(x) = \sum^{N+p}_{i'=1}c_{ii'}\Lambda^h_{i'}(x), \quad
i=1,\ldots,N/2+p.
\end{equation}
The (rectangular) matrix $c_{ii'}$ can be identified as the
one-dimensional \emph{restriction} $\mathbf{R}$ since
\begin{equation*}
b^{2h}_i = \int_0^1 dx f(x)\Lambda^{2h}_i(x) =
\sum^{N+p}_{i'=1}c_{ii'}\;b^h_{i'} =
\sum^{N+p}_{i'=1}R_{ii'}\;b^h_{i'}.
\end{equation*}
It can be computed by simply projecting Eq.(\ref{eq:basis_transf})
on the fine grid basis function $\Lambda^h_{j}(x)$ \cite{MG1D}:
\begin{equation}
\sum^{N+p}_{i'=1}R_{ii'}\underbrace{\int_0^1 dx
\Lambda^h_{i'}(x)\Lambda^h_{j}(x)}_{M^h_{i'j}} = \underbrace{\int_0^1 dx
\Lambda^{2h}_i(x)\Lambda^h_{j}(x)}_{M^{2h,h}_{i'j}} \Longrightarrow
\mathbf{R}=\mathbf{M}^{2h,h}\cdot(\mathbf{M}^{h})^{-1}.
\end{equation}
It should be stressed that the representation for $\Lambda^{2h}_i(x)$
in Eq.(\ref{eq:basis_transf}) is \emph {unique}. This is
checked by verifying that the same matrix $R_{ii'}$ is obtained using
for example the \emph{collocation} methods. One such method, which is used for
this check is detailed in Appendix \ref{sec:colloc}. The calculated
grid transfer matrices for linear, quadratic and cubic periodic and
non-periodic Splines are given in \cite{MG1D}.
Denoting the restriction on $x$ and $y$ respectively
by $\mathbf{R}^x$ and $\mathbf{R}^y$, the two-dimensional restriction of $b^h_{ij}$ is
defined as
\begin{equation*}
b^{2h}_{ij} = \int_0^1\int_0^1 dxdy f(x,y)\Lambda^{2h}_i(x)\Lambda^{2h}_j(y) =
\sum^{N+p}_{i'=1}\sum^{N+p}_{j'=1}R^x_{ii'}R^y_{jj'}b^h_{i'j'},
\end{equation*}
and thus
\begin{equation}
\label{eq:restriction}
\boxed{\mathbf{b}^{2h} = \mathbf{R}^x \cdot \mathbf{b}^{h}
\cdot (\mathbf{R}^y)^T.}
\end{equation}
\section{Prolongation Operator}
Using Eq.(\ref{eq:basis_transf}) (with $c_{ii'}=R_{ii'}$), the
solution at the coarse grid can be expressed as
\begin{equation*}
u^{2h}(x) = \sum_{i=1}^{N/2+p}u^{2h}_{i}\Lambda^{2h}_{i}(x) =
\sum_{i'=1}^{N+p}\left[\sum_{i=1}^{N/2+p} R_{ii'}u^{2h}_{i}\right]
\Lambda^h_{i'}(x) =
\sum_{i'=1}^{N+p}\underbrace{\left[\sum_{i=1}^{N/2+p} (R)^T_{i'i}u^{2h}_{i}\right]}_{\tilde{u}^h_{i'}}
\Lambda^h_{i'}(x),
\end{equation*}
from which one obvious choice for the \emph{prolongation} operator would be
\begin{equation}
\mathbf{P} = \mathbf{R}^T = (\mathbf{M}^{h})^{-1}\cdot\mathbf{M}^{h,2h}.
\end{equation}
Generalization to a two-dimensional prolongation is obtained as
follows, where summation over repeated indices is assumed:
\begin{equation*}
u^{2h}(x,y) = u^{2h}_{ij}\Lambda^{2h}_{i}(x)\Lambda^{2h}_{j}(y) =
\left[R^x_{ii'}u^{2h}_{ij}R^y_{jj'}\right]\Lambda^{h}_{i'}(x)\Lambda^{h}_{j'}(y)
\end{equation*}
which leads to the prolonged solution $\tilde{\mathbf{u}}^{h}$ given by
\begin{equation}
\label{eq:prolongation}
\boxed{\tilde{\mathbf{u}}^{h} = \mathbf{P}^x \cdot \mathbf{u}^{2h} \cdot
(\mathbf{P}^y)^T .}
\end{equation}
It should be noted here that, while the restricted right hand side
$\mathbf{b}^{2h}$ as defined in Eq.(\ref{eq:restriction}) is
\emph{exactly identical} to the assembled right hand side, the
prolonged solution $\tilde{\mathbf{u}}^{h}$ defined in
Eq.(\ref{eq:prolongation}) is just a
representation of $u^{2h}(x,y)$ on the fine mesh and \emph{not} the
solution $u^h(x,y)$ which can only be obtained by solving the problem
on the fine mesh!
\section{Numerical Experiments}
The multigrid performance can be characterized by looking at the
convergence of the residual Euclidean norm for the linear system
$\mathbf{A}\mathbf{u}=\mathbf{b}$:
\begin{equation}
\label{eq:resid}
\|\mathbf{r}\|_2 = \|\mathbf{b}-\mathbf{A}\mathbf{u}\|_2.
\end{equation}
When the \emph{exact} solution $u(x,y)$ is known, the \emph{discretization error}
can defined as
\begin{equation}
\label{eq:discerr}
\|e\|_2 = \sqrt{\int dV\left[\sum_{ij}u_{ij}\Lambda_{ij}(x,y)-u(x,y)\right]^2}
\end{equation}
and computed using a Gauss quadrature. Note that for Splines of order
$p$, $\|e(x,y)\|_2(h)$ converges to zero as $O(h^{p+1})$.
\subsection{Cartesian Geometry}
The multigrid performances for varying problem sizes are displayed in
Fig.(\ref{fig:linear_mg2d}) for linear Splines and
Fig.(\ref{fig:cubic_mg2d}) for cubic Splines. They show that the number
of iterations required for convergence (abount 3 for both linear and cubic
Splines) is insensitive to the problem sizes. Compared to direct
methods, the multigrid should scale much better for large problem
sizes, as indicated in Table~\ref{tab:comparison1}. For this model
problem, using cubic Splines seems to converge
slightly faster than linear Splines!
\begin{figure}
\centering
\includegraphics[angle=0,width=\hsize]{linear_mg2d}
\caption{Performance of the multigrid $V(2,1)$ scheme using a
Gauss-Seidel relaxation and \emph{linear Splines} for different
problem sizes. The size of the \emph{coarsest} grid is $2\times 2$.}
\label{fig:linear_mg2d}
\end{figure}
\begin{figure}
\centering
\includegraphics[angle=0,width=\hsize]{cubic_mg2d}
\caption{Performance of the multigrid $V(2,1)$ scheme using a
Gauss-Seidel relaxation and \emph{cubic Splines} for different
problem sizes. The size of the \emph{coarsest} grid is $2\times 2$.}
\label{fig:cubic_mg2d}
\end{figure}
\begin{table}[htb]
\centering
\begin{tabular}{|c|c|c|c|c|c|}\hline
& \multicolumn{2}{c|}{Linear Splines}
& \multicolumn{2}{c|}{Cubic Splines} \\ \hline
$N$ & $V(2,1)$ & Direct & $V(2,1)$ & Direct \\ \hline
16 & 8.844E-04 & 2.051E-03 & 2.653E-03 & 3.970E-03 \\
32 & 1.661E-03 & 5.345E-03 & 4.983E-03 & 1.540E-02 \\
64 & 5.766E-03 & 2.054E-02 & 1.730E-02 & 7.492E-02 \\
128 & 2.347E-02 & 3.288E-01 & 7.042E-02 & 1.060E+00 \\
\hline
\end{tabular}
\caption{Times (in seconds) used by a the \emph{direct sparse} solver MUMPS-4.10.0
for different problem sizes versus the times used by \emph{three}
multigrid $V(2,1)$ cycles. The Intel Fortran-13.0 compiler is used on an Intel
i7 platform.}
\label{tab:comparison1}
\end{table}
The effects of the relaxation parameters $\nu_1,\nu_2$ on the
multigrid performnace (Fig.(\ref{fig:cubic_mg2d_relax})) indicates
that only a few relaxations are sufficient to achieve a good multigrid
performance. Further analysis of the computational cost is required
however to determine the \emph{optimal} $\nu_1,\nu_2$.
Finally, the effects of the number of grid levels are analyzed in
Fig.(\ref{fig:cubic_mg2d_levels}). In addition to the computational
cost (see Table~\ref{tab:comparison2}), the memory required for the
\emph{direct solver} at the coarsest grid level should be taken into
account for the choice of the optimal number of grid levels,
especially for very large problems.
\begin{figure}
\centering
\includegraphics[angle=0,width=\hsize]{cubic_mg2d_relax}
\caption{Effect of the number of the relaxation sweeps $\nu_1,\nu_2$
on the performance of the multigrid $V(\nu_1,\nu_2)$-cycle for
\emph{Cubic Splines}. The finest grid has $128\times 128$
intervals.}
\label{fig:cubic_mg2d_relax}
\end{figure}
\begin{figure}
\centering
\includegraphics[angle=0,width=\hsize]{cubic_mg2d_levels}
\caption{Effect of the number grid levels
on the performance of the multigrid $V(2,1)$-cycle for \emph{Cubic
Splines}. The finest grid has $128\times 128$ intervals.}
\label{fig:cubic_mg2d_levels}
\end{figure}
\begin{table}[htb]
\centering
\begin{tabular}{|c|c|c|c|}\hline
Number of levels & $V(1,0)$ & $V(1,1)$ & $V(2,1)$ \\ \hline
2 & 3.386E-02 & 3.881E-02 & 4.031E-02 \\
3 & 2.923E-02 & 3.398E-02 & 3.605E-02 \\
4 & 2.880E-02 & 3.275E-02 & 3.595E-02 \\
7 & 2.912E-02 & 3.236E-02 & 3.566E-02 \\
\hline
\end{tabular}
\caption{Effects of the times in seconds used per $V$-cyclefor
different number of grid levels and relaxation paramters for a
$128\times 128$ problem. The Intel Fortran-13.0 compiler is used on an Intel
i7 platform.}
\label{tab:comparison2}
\end{table}
\subsection{Cylindrical Geometry}
\newpage
\appendix
\section{Grid transfer matrix by collocation}
\label{sec:colloc}
Let first consider the \emph{periodic case}. Denoting $N$ as the
number of intervals of the fine grid, the \emph{periodic} Spline basis
functions on the \emph{coarse} grid $\Lambda^{2h}_i$ can be expressed
as linear combinations of the \emph{fine} grid Spline basis functions
as:
\begin{equation}
\Lambda^{2h}_i(x) = \sum^{N}_{i'=1}R_{ii'}\Lambda^h_{i'}(x), \quad
i=1,\ldots,N/2.
\end{equation}
For any given $i$, the coefficients $R_{ii'}$ can be calculated by
expressing the relation above on exactly $N$ points on the $x$-grid. For
\emph{odd} Spline order $p$, these \emph{collocation} (or
interpolating) points can be chosen as the \emph{break} points of the fine
grid $x^h_k,\quad k=0,\ldots,N-1$. For \emph{even} values of $p$, the
collocation points should be $x^h_{k+1/2}=(x^h_{k}+x^h_{k+1})/2$ in
order to obtain a non-singular linear system of equations
\cite{BSPLINES}. The resulting system of equations to solve for
$R_{ii'}$ are given below:
\begin{equation}
\begin{split}
p\mbox{ odd}: \qquad &\sum^{N}_{i'=1}\Lambda^h_{i'}(x^h_k)\,R_{ii'} =
\Lambda^{2h}_i(x^h_k), \qquad k=0,\ldots,N-1,\quad i=1,\ldots,N/2, \\
p\mbox{ even}: \qquad &\sum^{N}_{i'=1}\Lambda^h_{i'}(x^h_{k+1/2})\,R_{ii'} =
\Lambda^{2h}_i(x^h_{k+1/2}), \qquad k=0,\ldots,N-1,\quad i=1,\ldots,N/2.\\
\end{split}
\end{equation}
For \emph{non-periodic} Splines, there are $N+p$ and
$N/2+p$ basis functions respectively on the fine and coarse grid:
\begin{equation}
\Lambda^{2h}_i(x) = \sum^{N+p}_{i'=1}R_{ii'}\Lambda^h_{i'}(x), \quad
i=1,\ldots,N/2+p.
\end{equation}
This implies that for any given $\Lambda^{2h}_i$, $N+p$ conditions
are required to determined the $N+p$ terms of row $i$ of the matrix $R_{ii'}$. For odd $p$,
$N+1$ collocation points $x_k,\quad k=0,\ldots,N$ can be used with the
missing $p-1$ equations obtained by expressing all the $(p-1)/2$ derivatives
of $\Lambda^{2h}_i(x)$ at the end points $x_0$ and $x_N$:
\begin{equation}
\frac{d^\alpha}{dx^\alpha}\Lambda^{2h}_i(x) =
\sum^{N+p}_{i'=1}R_{ii'}\frac{d^\alpha}{dx^\alpha} \Lambda^h_{i'}(x)
, \quad \alpha=1,\dots,\frac{p-1}{2} \quad (\mbox{$p$ odd}).
\end{equation}
For \emph{even} $p$, in addition to the $N$ relations obtained with
the collocation points $x_{k+1/2}$ (as in the \emph{periodic} case),
the missing $p$ conditions can be obtained by expressing $\Lambda^{2h}_i$
and its derivatives up to $p/2-1$ at the end points $x_0$ and $x_N$:
\begin{equation}
\frac{d^\alpha}{dx^\alpha}\Lambda^{2h}_i(x) =
\sum^{N+p}_{i'=1}R_{ii'}\frac{d^\alpha}{dx^\alpha} \Lambda^h_{i'}(x)
, \quad \alpha=0,\dots,\frac{p}{2}-1 \quad (\mbox{$p$ even}).
\end{equation}
\begin{thebibliography}{99}
\bibitem{MG1D} {\tt Multigrid Formulation for Finite Elements},\\
\url{https://crppsvn.epfl.ch/repos/bsplines/trunk/multigrid/docs/multigrid.pdf}
\bibitem{Briggs} {W.L.~Briggs, V.E.~Henson and S.F.~McCormick, A
Multigrid Tutorial, Second Edition, Siam (2000)}.
\bibitem{BSPLINES} {\tt BSPLINES} Reference Guide,
\url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/bsplines.pdf}
\bibitem{SOLVERS} {\tt The Solvers in BSPLINES},
\url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/solvers.pdf}
\end{thebibliography}
\end{document}
diff --git a/multigrid/src/CMakeLists.txt b/multigrid/src/CMakeLists.txt
index 888ffc3..7530640 100644
--- a/multigrid/src/CMakeLists.txt
+++ b/multigrid/src/CMakeLists.txt
@@ -1,92 +1,92 @@
#
# @file CMakeLists.txt
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Nicolas Richart
# @author Trach-Minh Tran
#
project(multigrid_tests)
if(${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel")
# set(CMAKE_Fortran_FLAGS_RELEASE
# "${CMAKE_Fortran_FLAGS_RELEASE} -profile-functions -profile-loops=outer"
# )
set(CMAKE_Fortran_FLAGS_DEBUG
"${CMAKE_Fortran_FLAGS_DEBUG} -fpe0"
)
endif()
set(MG_TESTS
transfer1d
test_relax
test_mg
test_mgp
test_csr
two_grid
test_mg2d
test_relax2d
test_transf2d
transfer1d_col
test_relax2d_cyl
test_transf2d_cyl
test_mg2d_cyl
)
foreach(test ${MG_TESTS})
add_executable(${test} ${test}.f90)
target_link_libraries(${test} bsplines)
endforeach()
add_executable(poisson_fd poisson_fd.f90 fdmat_mod.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90)
target_link_libraries(poisson_fd bsplines)
set(TESTS ${TESTS} poisson_mg)
add_executable(partition partition.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90)
target_link_libraries(partition bsplines)
# Fail to compile with crayftn 4.0.46 on ROSA/DAINT
add_executable(test_stencil test_stencil.f90 stencil_mod.f90 gvector_mod.f90)
target_link_libraries(test_stencil bsplines)
add_executable(test_stencilg test_stencilg.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90)
target_link_libraries(test_stencilg bsplines)
# Fail to compile with crayftn 4.0.46 on ROSA/DAINT
add_executable(test_jacobi test_jacobi.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90)
target_link_libraries(test_jacobi bsplines)
add_executable(test_jacobig test_jacobig.f90 fdmat_mod.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90)
target_link_libraries(test_jacobig bsplines)
add_executable(ppoisson_fd ppoisson_fd.f90 fdmat_mod.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90)
target_link_libraries(ppoisson_fd bsplines)
add_executable(test_gvec1d test_gvec1d.f90)
target_link_libraries(test_gvec1d bsplines)
add_executable(test_intergrid0 test_intergrid0.f90)
target_link_libraries(test_intergrid0 bsplines)
add_executable(test_intergrid1 test_intergrid1.f90 parmg_mod.f90 gvector_mod.f90 stencil_mod.f90)
target_link_libraries(test_intergrid1 bsplines)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
diff --git a/multigrid/src/Makefile b/multigrid/src/Makefile
index 5748f81..235bb03 100644
--- a/multigrid/src/Makefile
+++ b/multigrid/src/Makefile
@@ -1,136 +1,136 @@
#
# @file Makefile
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Trach-Minh Tran
#
MPIF90 = mpif90
LD = $(MPIF90)
# F90FLAGS = -I$(HOME)/include/O -I$(PPUTILS2)
# LDFLAGS = -L$(HOME)/lib/O -L${HDF5}/lib
F90FLAGS = -I$(FUTILS)/include -I$(BSPLINES)/include
LDFLAGS = -mkl=cluster -L$(FUTILS)/lib -L$(BSPLINES)/lib -L${HDF5}/lib
MODS = gvector_mod.o stencil_mod.o
LIBS = $(MODS) -lbsplines -lpppack -lpputils2 -lfutils \
-lhdf5_fortran -lhdf5 -lz
ifdef MKL
SPBLAS = -DMKL
endif
ifdef MUMPS
F90FLAGS += -I$(MUMPS)/include
LDFLAGS += -L$(MUMPS)/lib
LIBS += $(MUMPSLIBS)
endif
all: transfer1d test_relax test_mg test_mgp test_csr two_grid \
test_mg2d test_relax2d test_transf2d transfer1d_col \
test_relax2d_cyl test_transf2d_cyl test_mg2d_cyl poisson_fd
.SUFFIXES:
.SUFFIXES: .o .f90
.f90.o:
$(MPIF90) $(F90FLAGS) -c $<
partition: partition.o
$(LD) $(LDFLAGS) -o $@ $< parmg_mod.o $(MODS) -lpputils2 -lfutils \
-lhdf5_fortran -lhdf5 -lz
transfer1d: transfer1d.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
test_relax: test_relax.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
test_mg: test_mg.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
test_mgp: test_mgp.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
test_csr: test_csr.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
two_grid: two_grid.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
test_mg2d: test_mg2d.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
test_relax2d: test_relax2d.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
test_transf2d: test_transf2d.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
transfer1d_col: transfer1d_col.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
test_relax2d_cyl: test_relax2d_cyl.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
test_transf2d_cyl: test_transf2d_cyl.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
test_mg2d_cyl: test_mg2d_cyl.o
$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
poisson_fd: poisson_fd.o
$(LD) $(LDFLAGS) -o $@ $< fdmat_mod.o $(LIBS)
csr_mod.o: csr_mod.f90
$(MPIF90) $(FPP) $(SPBLAS) $(F90FLAGS) -c csr_mod.f90
parmg_mod.o: gvector_mod.o stencil_mod.o
partition.o: parmg_mod.o
transfer1d.o: $(MODS)
test_relax.o: $(MODS)
test_mg.o: $(MODS)
test_mgp.o: $(MODS)
test_csr.o: $(MODS)
two_grid.o: $(MODS)
test_mg2d.o: $(MODS)
test_relax2d.o: $(MODS)
test_transf2d.o: $(MODS)
transfer1d_col.o: $(MODS)
test_relax2d_cyl.o: $(MODS)
test_transf2d_cyl.o: $(MODS)
test_mg2d_cyl.o: $(MODS)
poisson_fd.o: fdmat_mod.o
fdmat_mod.o: stencil_mod.o parmg_mod.o
stencil_mod.o: gvector_mod.o
parmg_mod.o: gvector_mod.o stencil_mod.o
clean:
rm -f *.o *.mod *~ ../wk/*~ a.out lib
distclean: clean
rm -f ../wk/*.h5 ../wk/fort.* *.eps \
transfer1d test_relax test_mg test_mgp test_csr two_grid \
test_mg2d test_relax2d test_transf2d transfer1d_col \
test_relax2d_cyl test_transf2d_cyl test_mg2d_cyl
#include $(HOST).mk
diff --git a/multigrid/src/README_mod.txt b/multigrid/src/README_mod.txt
index 224e499..22b365f 100644
--- a/multigrid/src/README_mod.txt
+++ b/multigrid/src/README_mod.txt
@@ -1,91 +1,91 @@
/**
* @file README_mod.txt
*
* @brief
*
* @copyright
* Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
* SPC (Swiss Plasma Center)
*
- * spclibs is free software: you can redistribute it and/or modify it under
+ * SPClibs is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
- * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
* @authors
* (in alphabetical order)
* @author Trach-Minh Tran
*/
1) Module parmg
============
- types:
grid2_type
INTEGER, DIMENSION(2) :: s, e, s0, e0, npt_loc, npt
REAL(rkind), ALLOCATABLE :: x(:), y(:)
TYPE(gvector_2d) :: f, v
TYPE(stencil_2d) :: fdmat, restrict_mat
- module procedures:
creat_grid
coarse (1d, 2d)
exchange (gvector)
prolong (gvector)
restrict (gvector)
jacobi
get_resids
init_restrict (gvector)
disp (0d, 1d array of int)
get_lmax
- Uses
gvector
stencil
2) Module stencil
==============
- types:
LOGICAL :: nluni
INTEGER, DIMENSION(2) :: ldim, gdim, s0, e0, s, e
INTEGER :: npoints
INTEGER, ALLOCATABLE :: id(:,:)
REAL(rkind), ALLOCATABLE :: val(:,:,:)
- module procedures:
init
vmx
laplacian
putmat
- operators:
*: vmx
- Uses
gvector
3) Module gvector
==============
- types:
gvector_2d
INTEGER, DIMENSION(2) :: s, e ! vector internal bounds
INTEGER, DIMENSION(2) :: g ! ghost cell widths
REAL(rkind), ALLOCATABLE :: val(:,:)
- module procedures:
constructor (gvector_2d)
disp
norm2 (serial, mpi)
- operators:
+ : add_scal, add_vec
- : minus_vec, substract_vec
* : scale_left, scale_right
- assignment:
= : from_scal, from_vec
diff --git a/multigrid/src/fdmat_mod.f90 b/multigrid/src/fdmat_mod.f90
index c925169..2df2be5 100644
--- a/multigrid/src/fdmat_mod.f90
+++ b/multigrid/src/fdmat_mod.f90
@@ -1,600 +1,600 @@
!>
!> @file fdmat_mod.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
MODULE fdmat_mod
!
USE multigrid
IMPLICIT NONE
!
INTERFACE fdmat
MODULE PROCEDURE fdmat_stencil
MODULE PROCEDURE fdmat_gen, fdmat_csr, fdmat_cds
END INTERFACE fdmat
INTERFACE ibc_fdmat
MODULE PROCEDURE ibc_fdmat_stencil
MODULE PROCEDURE ibc_fdmat_gen, ibc_fdmat_csr, ibc_fdmat_cds
END INTERFACE ibc_fdmat
INTERFACE ibc_rhs
MODULE PROCEDURE ibc_rhs_g
END INTERFACE ibc_rhs
!
CONTAINS
!--------------------------------------------------------------------------------
SUBROUTINE fdmat_stencil(grid, fdense, icrosst, mat)
!
! Construct model GBS FD partitioned matrix
!
USE iso_fortran_env, ONLY : rkind => real64
USE stencil, ONLY : stencil_2d
USE parmg, ONLY : grid2_type
!
TYPE(grid2_type) :: grid
REAL(rkind), INTENT(in) :: icrosst
TYPE(stencil_2d) :: mat
INTERFACE
FUNCTION fdense(x)
USE iso_fortran_env, ONLY : rkind => real64
REAL(rkind), INTENT(in) :: x(:)
REAL(rkind) :: fdense(SIZE(x))
END FUNCTION fdense
END INTERFACE
!
REAL(rkind) :: dx, dy
REAL(rkind),ALLOCATABLE :: dense(:)
REAL(rkind) :: stencil_arr(-1:1,-1:1), zdiag(-1:1,-1:1), corr
INTEGER :: nx, ny, i, j, k, d(2)
!
! Grid properties
!
nx = grid%npt(1)-1
ny = grid%npt(2)-1
dx = grid%x(1)-grid%x(0) ! Assume equidistant grid
dy = grid%y(1)-grid%y(0)
ALLOCATE(dense(0:nx)) ! electron densoty vary only along x
!
! Stencil array
!
stencil_arr = 0.0d0
zdiag = 0.0d0
corr = 1.d0+icrosst**2/4.0d0
stencil_arr(0,0) = -2.0d0/dx/dx-2.0d0/dy/dy*corr
stencil_arr(-1,0) = 1.0/dx/dx
stencil_arr(1,0) = 1.0/dx/dx
stencil_arr(0,-1) = 1.0/dy/dy*corr
stencil_arr(0,1) = 1.0/dy/dy*corr
stencil_arr(-1,-1) = icrosst*1.0/4.0/dx/dy
stencil_arr(1,-1) = icrosst*(-1.0/4.0/dx/dy)
stencil_arr(-1,1) = icrosst*(-1.0/4.0/dx/dy)
stencil_arr(1,1) = icrosst*(1.0/4.0/dx/dy)
zdiag(0,0) = 1.0d0
dense(:) = fdense(grid%x(:))
!
! Assemble the stencil by scanning local grid points
!
DO k=0,mat%npoints-1
d(:) = mat%id(k,:)
DO j=mat%s(2),mat%e(2)
DO i=mat%s(1),mat%e(1)
mat%val(i,j,k) = stencil_arr(d(1),d(2)) + &
& zdiag(d(1),d(2))*dense(i)
END DO
END DO
END DO
!
DEALLOCATE(dense)
END SUBROUTINE fdmat_stencil
!--------------------------------------------------------------------------------
SUBROUTINE ibc_rhs_g(f, s0, e0, prb)
!
! Impose BC on rhs
!
USE iso_fortran_env, ONLY : rkind => real64
USE gvector, ONLY : gvector_2d
!
TYPE(gvector_2d), INTENT(inout) :: f
CHARACTER(len=*), INTENT(in) :: prb
INTEGER, INTENT(in) :: s0(2), e0(2)
INTEGER :: s(2), e(2)
!
s = f%s
e = f%e
!
IF(s(1).EQ.s0(1)) THEN
IF(prb(1:1).EQ.'d') THEN ! West face
f%val(s(1),s(2):e(2)) = 0.0_rkind
ELSE
f%val(s(1),s(2):e(2)) = 0.5_rkind*f%val(s(1),s(2):e(2))
END IF
END IF
IF(e(1).EQ.e0(1)) THEN
IF(prb(2:2).EQ.'d') THEN ! East face
f%val(e(1),s(2):e(2)) = 0.0_rkind
ELSE
f%val(e(1),s(2):e(2)) = 0.5_rkind*f%val(e(1),s(2):e(2))
END IF
END IF
IF(s(2).EQ.s0(2)) THEN
IF(prb(3:3).EQ.'d') THEN ! South face
f%val(s(1):e(1),s(2)) = 0.0_rkind
ELSE
f%val(s(1):e(1),s(2)) = 0.5_rkind*f%val(s(1):e(1),s(2))
END IF
END IF
IF(e(2).EQ.e0(2)) THEN
IF(prb(4:4).EQ.'d') THEN ! North face
f%val(s(1):e(1),e(2)) = 0.0_rkind
ELSE
f%val(s(1):e(1),e(2)) = 0.5_rkind*f%val(s(1):e(1),e(2))
END IF
END IF
END SUBROUTINE ibc_rhs_g
!--------------------------------------------------------------------------------
SUBROUTINE ibc_fdmat_stencil(mat, prb)
!
! Impose BC on matrix
!
USE iso_fortran_env, ONLY : rkind => real64
USE stencil, ONLY : stencil_2d
!
TYPE(stencil_2d), INTENT(inout) :: mat
CHARACTER(len=*), INTENT(in) :: prb
!
INTEGER :: s0(2), e0(2), s(2), e(2)
!
s0 = mat%s0
e0 = mat%e0
s = mat%s
e = mat%e
!
! Neumann BC
! WARNING: Divide the stencil by 2 => should do the same for RHS!
!
! N
! 6---7---8
! | | |
! W 4---0---5 E Numbering of stencil
! | | |
! 1---2---3
! S
!
IF(s(1).EQ.s0(1) .AND. prb(1:1).EQ.'n') THEN ! West face
mat%val(s(1),s(2):e(2),1) = 0.0_rkind
mat%val(s(1),s(2):e(2),3) = 0.0_rkind
mat%val(s(1),s(2):e(2),4) = 0.0_rkind
mat%val(s(1),s(2):e(2),5) = 2.0d0*mat%val(s(1),s(2):e(2),5)
mat%val(s(1),s(2):e(2),6) = 0.0_rkind
mat%val(s(1),s(2):e(2),8) = 0.0_rkind
mat%val(s(1),s(2):e(2),:) = 0.5_rkind*mat%val(s(1),s(2):e(2),:)
END IF
IF(e(1).EQ.e0(1) .AND. prb(2:2).EQ.'n') THEN ! East face
mat%val(e(1),s(2):e(2),1) = 0.0_rkind
mat%val(e(1),s(2):e(2),3) = 0.0_rkind
mat%val(e(1),s(2):e(2),4) = 2.0d0*mat%val(e(1),s(2):e(2),4)
mat%val(e(1),s(2):e(2),5) = 0.0_rkind
mat%val(e(1),s(2):e(2),6) = 0.0_rkind
mat%val(e(1),s(2):e(2),8) = 0.0_rkind
mat%val(e(1),s(2):e(2),:) = 0.5_rkind*mat%val(e(1),s(2):e(2),:)
END IF
IF(s(2).EQ.s0(2) .AND. prb(3:3).EQ.'n') THEN ! South face
mat%val(s(1):e(1),s(2),1) = 0.0_rkind
mat%val(s(1):e(1),s(2),2) = 0.0_rkind
mat%val(s(1):e(1),s(2),3) = 0.0_rkind
mat%val(s(1):e(1),s(2),6) = 0.0_rkind
mat%val(s(1):e(1),s(2),7) = 2.0d0*mat%val(s(1):e(1),s(2),7)
mat%val(s(1):e(1),s(2),8) = 0.0_rkind
mat%val(s(1):e(1),s(2),:) = 0.5_rkind*mat%val(s(1):e(1),s(2),:)
END IF
IF(e(2).EQ.e0(2) .AND. prb(4:4).EQ.'n') THEN ! North face
mat%val(s(1):e(1),e(2),1) = 0.0_rkind
mat%val(s(1):e(1),e(2),2) = 2.0d0*mat%val(s(1):e(1),e(2),2)
mat%val(s(1):e(1),e(2),3) = 0.0_rkind
mat%val(s(1):e(1),e(2),6) = 0.0_rkind
mat%val(s(1):e(1),e(2),7) = 0.0_rkind
mat%val(s(1):e(1),e(2),8) = 0.0_rkind
mat%val(s(1):e(1),e(2),:) = 0.5_rkind*mat%val(s(1):e(1),e(2),:)
END IF
!
! Dirichlet BC
!
IF(s(1).EQ.s0(1) .AND. prb(1:1).EQ.'d') THEN ! West face
mat%val(s(1),s(2):e(2),:) = 0.0_rkind
mat%val(s(1),s(2):e(2),0) = 1.0_rkind
END IF
IF(e(1).EQ.e0(1) .AND. prb(2:2).EQ.'d') THEN ! East face
mat%val(e(1),s(2):e(2),:) = 0.0_rkind
mat%val(e(1),s(2):e(2),0) = 1.0_rkind
END IF
IF(s(2).EQ.s0(2) .AND. prb(3:3).EQ.'d') THEN ! South face
mat%val(s(1):e(1),s(2),:) = 0.0_rkind
mat%val(s(1):e(1),s(2),0) = 1.0_rkind
END IF
IF(e(2).EQ.e0(2) .AND. prb(4:4).EQ.'d') THEN ! North face
mat%val(s(1):e(1),e(2),:) = 0.0_rkind
mat%val(s(1):e(1),e(2),0) = 1.0_rkind
END IF
END SUBROUTINE ibc_fdmat_stencil
!--------------------------------------------------------------------------------
SUBROUTINE fdmat_gen(grid, fdense, icrosst, noinit)
!
! Generic version
!
TYPE(grid2d), INTENT(inout) :: grid
DOUBLE PRECISION, INTENT(in) :: icrosst
LOGICAL, INTENT(in), OPTIONAL :: noinit
INTERFACE
FUNCTION fdense(x)
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: fdense(SIZE(x))
END FUNCTION fdense
END INTERFACE
!
IF(ALLOCATED(grid%mata)) THEN
CALL fdmat_csr(grid, fdense, icrosst, grid%mata, noinit)
ELSE
CALL fdmat_cds(grid, fdense, icrosst, grid%mata_cds, noinit)
END IF
END SUBROUTINE fdmat_gen
!--------------------------------------------------------------------------------
SUBROUTINE fdmat_cds(grid, fdense, icrosst, mat, noinit)
!
! Construct FD matrix
!
TYPE(grid2d), INTENT(in) :: grid
DOUBLE PRECISION, INTENT(in) :: icrosst
TYPE(cds_mat), INTENT(inout) :: mat
LOGICAL, INTENT(in), OPTIONAL :: noinit
INTERFACE
FUNCTION fdense(x)
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: fdense(SIZE(x))
END FUNCTION fdense
END INTERFACE
!
INTEGER :: n, nx, ny
INTEGER :: kl, ku, k
INTEGER :: ix, ix2, jx, iy, jy, iy2, irow
DOUBLE PRECISION :: lx, ly, dx, dy, mele
DOUBLE PRECISION :: dense(0:grid%n(1))
DOUBLE PRECISION :: stencil(-1:1,-1:1), zdiag(-1:1,-1:1)
DOUBLE PRECISION :: corr
LOGICAL :: run_init
INTEGER, ALLOCATABLE :: dists(:)
!--------------------------------------------------------------------------------
run_init = .TRUE.
IF(PRESENT(noinit)) run_init = .NOT.noinit
!
! Grid properties
!
nx = grid%n(1)
ny = grid%n(2)
dx = grid%x(1) - grid%x(0)
dy = grid%y(1) - grid%y(0)
lx = grid%x(nx)
ly = grid%y(ny)
n = PRODUCT(grid%rank) ! Rank of matrix
!
! Stencil
!
stencil = 0.0d0
zdiag = 0.0d0
!
corr = 1.d0+icrosst**2/4.0d0
stencil(0,0) = -2.0d0/dx/dx-2.0d0/dy/dy*corr
stencil(-1,0) = 1.0/dx/dx
stencil(1,0) = 1.0/dx/dx
stencil(0,-1) = 1.0/dy/dy*corr
stencil(0,1) = 1.0/dy/dy*corr
stencil(-1,-1) = icrosst*1.0/4.0/dx/dy
stencil(1,-1) = icrosst*(-1.0/4.0/dx/dy)
stencil(-1,1) = icrosst*(-1.0/4.0/dx/dy)
stencil(1,1) = icrosst*(1.0/4.0/dx/dy)
zdiag(0,0) = 1.0d0
!
! 9-point stencil "diagonal storage"
!
kl=4
ku=4
ALLOCATE(dists(-kl:ku))
DO iy2=-1,1
DO ix2=-1,1
k=3*iy2+ix2
dists(k) = iy2*(nx+1) + ix2
END DO
END DO
!
IF(run_init) THEN
CALL init(n, dists, 1, mat)
END IF
!
! Assemble matrix by scanning all grid points
!
dense(:) = fdense(grid%x(:))
DO iy=0,ny
DO ix=0,nx
irow = iy*(nx+1)+ix+1
DO iy2=-1,1
jy=iy+iy2
IF(jy.GE.0 .AND. jy.LE.ny) THEN
DO ix2=-1,1
jx=ix+ix2
IF(jx.GE.0 .AND.jx.LE.nx) THEN
mele = stencil(ix2,iy2) + zdiag(ix2,iy2)*dense(ix)
k=3*iy2+ix2
mat%val(irow,k) = mele
END IF
END DO
END IF
END DO
END DO
END DO
!
DEALLOCATE(dists)
END SUBROUTINE fdmat_cds
!--------------------------------------------------------------------------------
SUBROUTINE fdmat_csr(grid, fdense, icrosst, mat, noinit)
!
! Construct FD matrix
!
TYPE(grid2d), INTENT(in) :: grid
DOUBLE PRECISION, INTENT(in) :: icrosst
TYPE(csr_mat), INTENT(inout) :: mat
LOGICAL, INTENT(in), OPTIONAL :: noinit
INTERFACE
FUNCTION fdense(x)
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: fdense(SIZE(x))
END FUNCTION fdense
END INTERFACE
!
INTEGER :: n, nx, ny
INTEGER :: ix, ix2, jx, iy, jy, iy2, irow, icol
DOUBLE PRECISION :: lx, ly, dx, dy, mele
DOUBLE PRECISION :: dense(0:grid%n(1))
DOUBLE PRECISION :: stencil(-1:1,-1:1), zdiag(-1:1,-1:1)
DOUBLE PRECISION :: corr
LOGICAL :: run_init
!--------------------------------------------------------------------------------
run_init = .TRUE.
IF(PRESENT(noinit)) run_init = .NOT.noinit
!
! Grid properties
!
nx = grid%n(1)
ny = grid%n(2)
dx = grid%x(1) - grid%x(0)
dy = grid%y(1) - grid%y(0)
lx = grid%x(nx)
ly = grid%y(ny)
n = PRODUCT(grid%rank) ! Rank of matrix
!
! Stencil
!
stencil = 0.0d0
zdiag = 0.0d0
!
corr = 1.d0+icrosst**2/4.0d0
stencil(0,0) = -2.0d0/dx/dx-2.0d0/dy/dy*corr
stencil(-1,0) = 1.0/dx/dx
stencil(1,0) = 1.0/dx/dx
stencil(0,-1) = 1.0/dy/dy*corr
stencil(0,1) = 1.0/dy/dy*corr
stencil(-1,-1) = icrosst*1.0/4.0/dx/dy
stencil(1,-1) = icrosst*(-1.0/4.0/dx/dy)
stencil(-1,1) = icrosst*(-1.0/4.0/dx/dy)
stencil(1,1) = icrosst*(1.0/4.0/dx/dy)
zdiag(0,0) = 1.0d0
!
! Create CSR matrix
IF(run_init) THEN
CALL init(n, 1, mat)
END IF
!
! Assemble matrix by scanning all grid points
!
dense(:) = fdense(grid%x(:))
DO iy=0,ny
DO ix=0,nx
irow = numb(ix,iy)
DO iy2=-1,1
jy=iy+iy2
IF(jy.GE.0 .AND. jy.LE.ny) THEN
DO ix2=-1,1
jx=ix+ix2
IF(jx.GE.0 .AND.jx.LE.nx) THEN
icol=numb(jx,jy)
mele = stencil(ix2,iy2) + zdiag(ix2,iy2)*dense(ix)
CALL putele(mat, irow, icol,mele)
END IF
END DO
END IF
END DO
END DO
END DO
!--------------------------------------------------------------------------------
CONTAINS
INTEGER FUNCTION numb(ix,iy)
INTEGER, INTENT(in) :: ix, iy
INTEGER :: stride
stride = grid%rank(1)
numb = iy*stride + (ix+1)
END FUNCTION numb
!--------------------------------------------------------------------------------
END SUBROUTINE fdmat_csr
!++
SUBROUTINE ibc_fdmat_gen(grid, prb)
!
! Generic version
!
TYPE(grid2d), INTENT(inout) :: grid
CHARACTER(len=*), INTENT(in) :: prb
!
IF(ALLOCATED(grid%mata)) THEN
CALL ibc_fdmat_csr(grid, grid%mata, prb)
ELSE
CALL ibc_fdmat_cds(grid, grid%mata_cds, prb)
END IF
!
END SUBROUTINE ibc_fdmat_gen
!++
SUBROUTINE ibc_fdmat_csr(grid, mat, prb)
!
! Impose BC
!
TYPE(grid2d), INTENT(in) :: grid
TYPE(csr_mat), INTENT(inout) :: mat
CHARACTER(len=*), INTENT(in) :: prb
!
DOUBLE PRECISION :: arow(mat%rank)
INTEGER :: nx, ny, nx1, ny1, n, iy, irow, irow1
!--------------------------------------------------------------------------------
nx = grid%n(1)
ny = grid%n(2)
nx1=nx+1
ny1=ny+1
n = nx1*ny1
!
! Dirichelt BC on West/East
!
IF(prb.EQ.'dddd') THEN
DO irow=1,ny*nx1+1,nx1
arow=0.0d0; arow(irow)=1.0d0
CALL putrow(mat, irow, arow)
irow1=irow+nx
arow=0.0d0; arow(irow1)=1.0d0
CALL putrow(mat, irow1, arow)
END DO
!
! Neumann on West/East
! WARNING: Divide the stencil by 2 => should do the same for RHS!
!
ELSE IF(prb.EQ.'nndd') THEN
DO irow=1,ny*nx1+1,nx1
iy = irow/nx1
CALL getrow(mat, irow, arow)
arow(irow+1) = 2.0d0*arow(irow+1)
IF(iy.GT.0) arow(irow-nx) = 0.0d0
IF(iy.LT.ny) arow(irow+nx+2) = 0.0d0
arow(:) = 0.5d0*arow(:)
CALL putrow(mat, irow, arow)
END DO
DO irow=nx1,n,nx1
iy = irow/nx1
CALL getrow(mat, irow, arow)
arow(irow-1) = 2.0d0*arow(irow-1)
IF(iy.GT.0) arow(irow-nx-2) = 0.0d0
IF(iy.LT.ny) arow(irow+nx) = 0.0d0
arow(:) = 0.5d0*arow(:)
CALL putrow(mat, irow, arow)
END DO
!
ELSE
WRITE(*,'(a,a4,a)') 'ibc_mat: prb = ', prb, ' NOT IMPLEMENTED!'
STOP
END IF
!
! Dirichlet BC on South/North sides
!
DO irow=1,nx1
arow=0.0d0; arow(irow)=1.0d0
CALL putrow(mat, irow, arow)
END DO
DO irow1=ny*nx1+1,n
arow=0.0d0; arow(irow1)=1.0d0
CALL putrow(mat, irow1, arow)
END DO
!--------------------------------------------------------------------------------
END SUBROUTINE ibc_fdmat_csr
!++
SUBROUTINE ibc_fdmat_cds(grid, mat, prb)
!
! Impose BC
!
TYPE(grid2d), INTENT(in) :: grid
TYPE(cds_mat), INTENT(inout) :: mat
CHARACTER(len=*), INTENT(in) :: prb
!
INTEGER :: nx, ny, iy, irow
INTEGER :: n, nx1, ny1
!--------------------------------------------------------------------------------
nx = grid%n(1)
ny = grid%n(2)
nx1=nx+1
ny1=ny+1
n = nx1*ny1
!
! 2 == 3 == 4
! | | |
! -1 == 0 == 1
! | | |
! -4 == -3 == -2
!
! Dirichelt BC on West/East
!
IF(prb.EQ.'dddd') THEN
DO irow=1,ny*nx1+1,nx1
mat%val(irow,:) = 0.0d0
mat%val(irow,0) = 1.0d0
mat%val(irow+nx,:) = 0.0d0
mat%val(irow+nx,0) = 1.0d0
END DO
!
! Neumann on West/East
! WARNING: Divide the stencil by 2 => should do the same for RHS!
!
ELSE IF(prb.EQ.'nndd') THEN
DO irow=1,ny*nx1+1,nx1
iy = irow/nx1
IF(iy.GT.0) mat%val(irow,-2)=0.0d0
IF(iy.LT.ny) mat%val(irow,+4)=0.0d0
mat%val(irow,+1)=2.0d0*mat%val(irow,+1)
mat%val(irow,:)=0.5d0*mat%val(irow,:)
END DO
DO irow=nx1,n,nx1
iy = irow/nx1
IF(iy.GT.0) mat%val(irow,-4)=0.0d0
IF(iy.LT.ny) mat%val(irow,+2)=0.0d0
mat%val(irow,-1)=2.0d0*mat%val(irow,-1)
mat%val(irow,:)=0.5d0*mat%val(irow,:)
END DO
!
ELSE
WRITE(*,'(a,a4,a)') 'ibc_mat: prb = ', prb, ' NOT IMPLEMENTED!'
STOP
END IF
!
! Dirichlet BC on South/North sides
!
DO irow=1,nx1
mat%val(irow,:) = 0.0d0
mat%val(irow,0) = 1.0d0
END DO
DO irow=ny*nx1+1,n
mat%val(irow,:) = 0.0d0
mat%val(irow,0) = 1.0d0
END DO
!--------------------------------------------------------------------------------
END SUBROUTINE ibc_fdmat_cds
!++
END MODULE fdmat_mod
diff --git a/multigrid/src/gvector_mod.f90 b/multigrid/src/gvector_mod.f90
index 2f0162e..5fddf67 100644
--- a/multigrid/src/gvector_mod.f90
+++ b/multigrid/src/gvector_mod.f90
@@ -1,231 +1,231 @@
!>
!> @file gvector_mod.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
MODULE gvector
!
! Implementation of 2D vectors with arbitrary
! vector bounds and ghost cell width.
!
! T.M. Tran, CRPP-EPFL
! September 2013
!
USE iso_fortran_env, ONLY : rkind => real64
IMPLICIT NONE
PRIVATE
PUBLIC :: gvector_2d, disp, norm2, &
& OPERATOR(+), OPERATOR(-), OPERATOR(*), &
& ASSIGNMENT(=)
TYPE gvector_2d
INTEGER, DIMENSION(2) :: s, e ! vector internal bounds
INTEGER, DIMENSION(2) :: g ! ghost cell widths
REAL(rkind), ALLOCATABLE :: val(:,:)
END TYPE gvector_2d
INTERFACE gvector_2d
MODULE PROCEDURE constructor
END INTERFACE gvector_2d
INTERFACE OPERATOR(+)
MODULE PROCEDURE add_scal
MODULE PROCEDURE add_vec
END INTERFACE OPERATOR(+)
INTERFACE OPERATOR(-)
MODULE PROCEDURE minus_vec
MODULE PROCEDURE substract_vec
END INTERFACE OPERATOR(-)
INTERFACE OPERATOR(*)
MODULE PROCEDURE scale_left
MODULE PROCEDURE scale_right
END INTERFACE OPERATOR(*)
INTERFACE ASSIGNMENT(=)
MODULE PROCEDURE from_scal
MODULE PROCEDURE from_vec
END INTERFACE ASSIGNMENT(=)
INTERFACE norm2
MODULE PROCEDURE norm2_gvector_2d
MODULE PROCEDURE norm2_root_g_2d
MODULE PROCEDURE norm2_all_g_2d
END INTERFACE norm2
CONTAINS
!=======================================================================
FUNCTION constructor(s, e, g) RESULT(res)
INTEGER, INTENT(in) :: s(2), e(2)
INTEGER, OPTIONAL, INTENT(in) :: g(2)
TYPE(gvector_2d) :: res
INTEGER :: lb(2), ub(2)
res%g= 0
IF(PRESENT(g)) res%g = g
res%s = s
res%e = e
lb = res%s - res%g
ub = res%e + res%g
ALLOCATE(res%val(lb(1):ub(1),lb(2):ub(2)))
!
! Initialize to 0 on all ghost cells
res%val(lb(1):s(1)-1,:) = 0._rkind
res%val(e(1)+1:ub(1),:) = 0._rkind
res%val(:,lb(2):s(2)-1) = 0._rkind
res%val(:,e(2)+1:ub(2)) = 0._rkind
END FUNCTION constructor
!=======================================================================
FUNCTION add_vec(lhs, rhs) RESULT(res)
TYPE(gvector_2d), INTENT(in) :: lhs, rhs
TYPE(gvector_2d) :: res
res = gvector_2d(lhs%s, lhs%e, lhs%g)
res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = &
& lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) + &
& rhs%val(res%s(1):res%e(1),res%s(2):res%e(2))
END FUNCTION add_vec
!=======================================================================
FUNCTION add_scal(lhs, rhs) RESULT(res)
TYPE(gvector_2d), INTENT(in) :: lhs
REAL(rkind), INTENT(in) :: rhs
TYPE(gvector_2d) :: res
res = gvector_2d(lhs%s, lhs%e, lhs%g)
res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = &
& lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) + rhs
END FUNCTION add_scal
!=======================================================================
FUNCTION minus_vec(this) RESULT(res)
TYPE(gvector_2d), INTENT(in) :: this
TYPE(gvector_2d) :: res
res = gvector_2d(this%s, this%e, this%g)
res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = &
& -this%val(res%s(1):res%e(1),res%s(2):res%e(2))
END FUNCTION minus_vec
!=======================================================================
FUNCTION substract_vec(lhs, rhs) RESULT(res)
TYPE(gvector_2d), INTENT(in) :: lhs, rhs
TYPE(gvector_2d) :: res
res = gvector_2d(lhs%s, lhs%e, lhs%g)
res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = &
& lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) - &
& rhs%val(res%s(1):res%e(1),res%s(2):res%e(2))
END FUNCTION substract_vec
!=======================================================================
FUNCTION scale_left(lhs, rhs) RESULT(res)
REAL(rkind), INTENT(in) :: lhs
TYPE(gvector_2d), INTENT(in) :: rhs
TYPE(gvector_2d) :: res
res = gvector_2d(rhs%s, rhs%e, rhs%g)
res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = &
& lhs * rhs%val(res%s(1):res%e(1),res%s(2):res%e(2))
END FUNCTION scale_left
!=======================================================================
FUNCTION scale_right(lhs, rhs) RESULT(res)
TYPE(gvector_2d), INTENT(in) :: lhs
REAL(rkind), INTENT(in) :: rhs
TYPE(gvector_2d) :: res
res = gvector_2d(lhs%s, lhs%e, lhs%g)
res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = &
& lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) * rhs
END FUNCTION scale_right
!=======================================================================
SUBROUTINE from_vec(lhs, rhs)
TYPE(gvector_2d), INTENT(inout) :: lhs
REAL(rkind), INTENT(in) :: rhs(:,:)
INTEGER :: n(2)
n = lhs%e - lhs%s + 1
IF(SIZE(rhs,1).NE.n(1) .OR. SIZE(rhs,2).NE.n(2)) THEN
PRINT*, 'from_vec: sizes of rhs and lhs not equal!'
STOP
END IF
lhs%val(lhs%s(1):lhs%e(1),lhs%s(2):lhs%e(2)) = rhs(:,:)
END SUBROUTINE from_vec
!=======================================================================
SUBROUTINE from_scal(lhs, rhs)
TYPE(gvector_2d), INTENT(inout) :: lhs
REAL(rkind), INTENT(in) :: rhs
lhs%val(lhs%s(1):lhs%e(1),lhs%s(2):lhs%e(2)) = rhs
END SUBROUTINE from_scal
!=======================================================================
SUBROUTINE disp(str,this)
CHARACTER(len=*), INTENT(in) :: str
TYPE(gvector_2d), INTENT(in) :: this
INTEGER :: i
WRITE(*,'(/a,3(" (",i0,",",i0,") "))') str//': s, e, g =',&
& this%s, this%e, this%g
DO i=LBOUND(this%val,1),UBOUND(this%val,1)
WRITE(*,'(10(1pe11.3))') (this%val(i,:))
END DO
END SUBROUTINE disp
!=======================================================================
FUNCTION norm2_gvector_2d(this) RESULT(res)
TYPE(gvector_2d), INTENT(in) :: this
REAL(rkind) :: res
res = NORM2( this%val(this%s(1):this%e(1), &
& this%s(2):this%e(2)) )
END FUNCTION norm2_gvector_2d
!=======================================================================
FUNCTION norm2_root_g_2d(x, comm, root) RESULT(res)
!
! Vector norm of 2d distributed array with ghost cells
!
USE mpi
TYPE(gvector_2d), INTENT(in) :: x
INTEGER, INTENT(in) :: comm
INTEGER, INTENT(in) :: root
REAL(rkind) :: res
INTEGER, PARAMETER :: ndim=2
INTEGER, DIMENSION(ndim) :: s, e
REAL(rkind) :: res_loc
INTEGER :: me, ierr
!
CALL mpi_comm_rank(comm, me, ierr)
s = x%s
e = x%e
res_loc = SUM(x%val(s(1):e(1),s(2):e(2))**2)
res = 0.0
CALL mpi_reduce(res_loc, res, 1, MPI_DOUBLE_PRECISION, MPI_SUM,&
& root, comm, ierr)
IF(me.EQ.root) res = SQRT(res)
END FUNCTION norm2_root_g_2d
!=======================================================================
FUNCTION norm2_all_g_2d(x, comm) RESULT(res)
!
! Vector norm of 2d distributed array with ghost cells
!
USE mpi
TYPE(gvector_2d), INTENT(in) :: x
INTEGER, INTENT(in) :: comm
REAL(rkind) :: res
INTEGER, PARAMETER :: ndim=2
INTEGER, DIMENSION(ndim) :: s, e
REAL(rkind) :: res_loc
INTEGER :: ierr
!
s = x%s
e = x%e
res_loc = SUM(x%val(s(1):e(1),s(2):e(2))**2)
CALL mpi_allreduce(res_loc, res, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
& comm, ierr)
res = SQRT(res)
END FUNCTION norm2_all_g_2d
!=======================================================================
END MODULE gvector
diff --git a/multigrid/src/parmg_mod.f90 b/multigrid/src/parmg_mod.f90
index 5de78ca..25ebc52 100644
--- a/multigrid/src/parmg_mod.f90
+++ b/multigrid/src/parmg_mod.f90
@@ -1,722 +1,722 @@
!>
!> @file parmg_mod.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
MODULE parmg
!
! parmg: Utilities for parallel multigrid
!
! T.M. Tran, CRPP-EPFL
! December 2013
!
USE mpi
USE iso_fortran_env, ONLY : rkind => real64
USE gvector, ONLY : gvector_2d
USE stencil, ONLY : stencil_2d
IMPLICIT NONE
!
PRIVATE
PUBLIC :: grid2_type, mg_info, create_grid, mg, get_lmax, coarse, disp, exchange, &
& get_resids, jacobi, prolong, init_restrict, restrict, &
& norm_vec, norm_mat
!
TYPE grid2_type
INTEGER, DIMENSION(2) :: s, e, s0, e0, npt_loc, npt
REAL(rkind), ALLOCATABLE :: x(:)
REAL(rkind), ALLOCATABLE :: y(:)
TYPE(gvector_2d) :: f
TYPE(gvector_2d) :: v
TYPE(stencil_2d) :: fdmat
TYPE(stencil_2d) :: restrict_mat
END TYPE grid2_type
!
TYPE mg_info
INTEGER :: comm ! Communicator
INTEGER :: nu1 ! Relaxation down sweeps
INTEGER :: nu2 ! Relaxation up sweeps
INTEGER :: mu ! mu-cycle number
INTEGER :: nu0 ! Number of FMG cycles
INTEGER :: levels ! Number of mg levels
INTEGER :: direct_solve_nits ! Jacobit nits for direct_solve
CHARACTER(len=4) :: relax ! Type of relation
REAL(rkind) :: omega ! for weighted Jacobi relaxation
END TYPE mg_info
!
INTERFACE create_grid
MODULE PROCEDURE create_grid_2d
END INTERFACE create_grid
INTERFACE mg
MODULE PROCEDURE mg_2d
END INTERFACE mg
INTERFACE coarse
MODULE PROCEDURE coarse_1d, coarse_2d
END INTERFACE coarse
INTERFACE exchange
MODULE PROCEDURE exchange_g_2d, exchange_g_2d_new
END INTERFACE exchange
INTERFACE prolong
MODULE PROCEDURE prolong_g_2d
END INTERFACE prolong
INTERFACE restrict
MODULE PROCEDURE restrict_g_2d
END INTERFACE restrict
INTERFACE jacobi
MODULE PROCEDURE jacobi_stencila_2d
MODULE PROCEDURE jacobi_stencilg_2d
END INTERFACE jacobi
INTERFACE get_resids
MODULE PROCEDURE resids_stencila_2d
MODULE PROCEDURE resids_stencilg_2d
END INTERFACE get_resids
INTERFACE disp
MODULE PROCEDURE dispi_0, dispi_1
END INTERFACE disp
CONTAINS
!
!--------------------------------------------------------------------------------
SUBROUTINE create_grid_2d(x, y, s_in, e_in, id, prb, grids, comm)
!
! Create arrays of partitionned grids
!
USE stencil, ONLY : init
!
REAL(rkind), INTENT(in) :: x(0:), y(0:) ! Global coordinates
INTEGER, INTENT(in) :: s_in(2), e_in(2) ! Partition of finest grid
INTEGER, INTENT(in) :: id(:,:) ! Structure of stencil
CHARACTER(len=*), INTENT(in) :: prb
INTEGER, INTENT(in) :: comm
TYPE(grid2_type) :: grids(:)
!
INTEGER :: levels
INTEGER :: s0(2), e0(2), s(2), e(2)
INTEGER :: npt_loc(2), npt_loc_min(2), npt_glob(2)
INTEGER :: l, ierr
!
levels = SIZE(grids)
s = s_in
e = e_in
!
DO l=1,levels
IF(l.GT.1) THEN
CALL coarse(s,e)
END IF
npt_loc = e-s+1
CALL mpi_allreduce(s, s0, 2, MPI_INTEGER, MPI_MIN, comm, ierr)
CALL mpi_allreduce(e, e0, 2, MPI_INTEGER, MPI_MAX, comm, ierr)
CALL mpi_allreduce(npt_loc, npt_loc_min, 2, MPI_INTEGER, MPI_MIN, comm, ierr)
IF(MINVAL(npt_loc_min) .LT. 2) THEN
PRINT*, 'CREATE_GRID: number intervals too small!'
STOP
END IF
npt_glob = e0+1
grids(l)%s0 = s0
grids(l)%e0 = e0
grids(l)%s = s
grids(l)%e = e
grids(l)%npt_loc = npt_loc
grids(l)%npt = npt_glob
grids(l)%f = gvector_2d(s, e, [1,1]) ! Arrays with ghost cell
grids(l)%v = gvector_2d(s, e, [1,1])
ALLOCATE(grids(l)%x(s0(1):e0(1))) ! Global coords (x,y)
ALLOCATE(grids(l)%y(s0(2):e0(2)))
IF(l.EQ.1) THEN
grids(1)%x = x
grids(1)%y = y
ELSE
grids(l)%x(:) = grids(l-1)%x(0::2)
grids(l)%y(:) = grids(l-1)%y(0::2)
END IF
END DO
!
! Set up FD matrix
!
DO l=1,levels
s = grids(l)%s
e = grids(l)%e
CALL init(s, e, id, .FALSE., grids(l)%fdmat, comm)
END DO
!
! Set up restriction stencil
!
DO l=2,levels
CALL init_restrict(grids(l), prb, comm)
END DO
!
END SUBROUTINE create_grid_2d
!--------------------------------------------------------------------------------
RECURSIVE SUBROUTINE mg_2d(grids, info, l)
!
! Execute a recursive V-cycle
!
USE gvector, ONLY : ASSIGNMENT(=), OPERATOR(+)
TYPE(grid2_type), INTENT(inout) :: grids(:)
TYPE(mg_info), INTENT(in) :: info
INTEGER, INTENT(in) :: l
!
TYPE(gvector_2d) :: resids, v_prolong
INTEGER, DIMENSION(2) :: s0, e0, s, e, g=[1,1]
INTEGER :: comm, levels, k
!
comm = info%comm
levels = info%levels
!
s0 = grids(l)%s0; e0 = grids(l)%e0
s = grids(l)%s; e = grids(l)%e
resids = gvector_2d(s, e, g)
!
IF(l.EQ.levels) THEN
CALL direct_solve(grids(l)%fdmat, grids(l)%v, grids(l)%f)
ELSE
CALL relax(info%nu1)
resids = get_resids(comm, grids(l)%fdmat, grids(l)%v, grids(l)%f)
CALL exchange(comm, resids)
CALL restrict(grids(l+1)%restrict_mat, resids, grids(l+1)%f)
grids(l+1)%v = 0.0d0
!
! Only 1 call to the coarsest level
DO k=1,MERGE(info%mu, 1, (l+1).NE.levels)
CALL mg(grids, info, l+1)
END DO
!
v_prolong = gvector_2d(s, e, g)
CALL exchange(comm, grids(l+1)%v)
CALL prolong(grids(l+1)%v, v_prolong)
grids(l)%v = grids(l)%v + v_prolong
CALL relax(info%nu2)
END IF
!
CONTAINS
SUBROUTINE relax(nu)
INTEGER, INTENT(in) :: nu
SELECT CASE (TRIM(info%relax))
CASE ("jac")
CALL jacobi(comm, grids(l)%fdmat, info%omega, nu, grids(l)%v, grids(l)%f)
CASE default
PRINT*, "relax ", info%relax, " NOT IMPLEMENTED!"
STOP
END SELECT
END SUBROUTINE relax
SUBROUTINE direct_solve(mat, v, f)
TYPE(stencil_2d), INTENT(in) :: mat
TYPE(gvector_2d), INTENT(inout) :: v
TYPE(gvector_2d), INTENT(in) :: f
v = 0.0d0
CALL jacobi(comm, mat, 1.0_rkind, info%direct_solve_nits, v, f)
END SUBROUTINE direct_solve
END SUBROUTINE mg_2d
!--------------------------------------------------------------------------------
FUNCTION get_lmax(s_in, npt_loc, npt_min, comm) RESULT(lmax)
!
! Get max number of levels on all processes
!
INTEGER :: lmax
INTEGER, INTENT(in) :: s_in, npt_loc, npt_min, comm
INTEGER :: me, ierr
INTEGER :: s, e, kpt_loc, kpt, kpt_loc_min
!
CALL mpi_comm_rank(comm, me, ierr)
s = s_in
kpt_loc = npt_loc
e = s+npt_loc-1
lmax = 1
DO
CALL mpi_allreduce(kpt_loc, kpt_loc_min, 1, MPI_INTEGER, MPI_MIN, &
& comm, ierr)
CALL mpi_allreduce(kpt_loc, kpt, 1, MPI_INTEGER, MPI_SUM, &
& comm, ierr)
!
! Stop if npt-1 not even or when minumum local npt is attained
IF(MODULO(kpt-1,2).NE.0 .OR. kpt_loc_min .LE. npt_min) EXIT
!
lmax = lmax+1
CALL coarse(s, e)
kpt_loc = e-s+1
END DO
!
END FUNCTION get_lmax
!--------------------------------------------------------------------------------
SUBROUTINE coarse_1d(s, e)
!
! Compute (s,e) of next coarse grid
!
INTEGER, INTENT(inout) :: s, e
INTEGER :: s0, npt, i
!
! Previous odd indices are discarded
s0 = s
IF( MODULO(s0,2) .NE. 0 ) THEN
s0 = s+1
END IF
!
! Count local number of points
npt = 0
DO i=s0,e,2
npt = npt+1
END DO
!
! Coarse s, e
s = s0/2
e = s + npt - 1
END SUBROUTINE coarse_1d
!--------------------------------------------------------------------------------
SUBROUTINE coarse_2d(s, e)
!
! Compute (s,e) of next coarse grid
!
INTEGER, INTENT(inout) :: s(2), e(2)
!
CALL coarse_1d(s(1), e(1))
CALL coarse_1d(s(2), e(2))
END SUBROUTINE coarse_2d
!--------------------------------------------------------------------------------
SUBROUTINE dispi_0(str, a, comm)
!
! Display integer local scalar
!
INTEGER, INTENT(in) :: a, comm
CHARACTER(len=*), INTENT(in) :: str
INTEGER :: npes, me, ierr
INTEGER, ALLOCATABLE, DIMENSION(:) :: a_gather(:)
!
CALL MPI_COMM_RANK(comm, me, ierr)
CALL MPI_COMM_SIZE(comm, npes, ierr)
ALLOCATE(a_gather(npes))
CALL MPI_GATHER(a, 1, MPI_INTEGER, a_gather, 1, MPI_INTEGER, &
& 0, comm, ierr)
IF( me.EQ.0 ) THEN
WRITE(*,'(a/(20i6))') str, a_gather
END IF
DEALLOCATE(a_gather)
END SUBROUTINE dispi_0
!--------------------------------------------------------------------------------
SUBROUTINE dispi_1(str, a, comm)
!
! Display integer local array
!
INTEGER, INTENT(in) :: a(:), comm
CHARACTER(len=*), INTENT(in) :: str
INTEGER :: npes, me, ierr, n, i
INTEGER, ALLOCATABLE, DIMENSION(:) :: a_gather(:,:)
!
n = SIZE(a,1)
CALL MPI_COMM_RANK(comm, me, ierr)
CALL MPI_COMM_SIZE(comm, npes, ierr)
ALLOCATE(a_gather(n,npes))
CALL MPI_GATHER(a, n, MPI_INTEGER, a_gather, n, MPI_INTEGER, &
& 0, comm, ierr)
IF( me.EQ.0 ) THEN
WRITE(*,'(a)') str
DO i=1,n
WRITE(*,'(20i6)') a_gather(i,:)
END DO
END IF
DEALLOCATE(a_gather)
END SUBROUTINE dispi_1
!--------------------------------------------------------------------------------
SUBROUTINE exchange_g_2d_new(comm, u)
!
! Exchange ghost cells with (west,east,south,north) neighbors.
! Assume same ghost cells on each dimension:
! u%g(1) : number of ghost cells on west and east boundaries
! u%g(2) : number of ghost cells on south and north boundaries
!
INTEGER, INTENT(in) :: comm
TYPE(gvector_2d), INTENT(inout) :: u
INTEGER :: neighs(4), ierr
!
CALL mpi_cart_shift(comm, 0, 1, neighs(1), neighs(2), ierr)
CALL mpi_cart_shift(comm, 1, 1, neighs(3), neighs(4), ierr)
CALL exchange_g_2d(comm, neighs, u)
END SUBROUTINE exchange_g_2d_new
!--------------------------------------------------------------------------------
SUBROUTINE exchange_g_2d(comm, neighs, u)
!
! Exchange ghost cells with (west,east,south,north) neighbors.
! Assume same ghost cells on each dimension:
! u%g(1) : number of ghost cells on west and east boundaries
! u%g(2) : number of ghost cells on south and north boundaries
!
INTEGER, INTENT(in) :: comm
INTEGER, INTENT(in) :: neighs(4)
TYPE(gvector_2d), INTENT(inout) :: u
!
INTEGER :: cols, rows
INTEGER :: ierr
INTEGER, PARAMETER :: ndim=2
INTEGER, DIMENSION(ndim) :: g, lb, ub, s, e, n
!
s = u%s
e = u%e
g = u%g
lb = s - g
ub = e + g
n = ub - lb + 1 ! include ghost cells
!
! g(2) matrix full rows with stride n(1)
CALL mpi_type_vector(n(2), g(2), n(1), MPI_DOUBLE_PRECISION, rows, ierr)
CALL mpi_type_commit(rows, ierr)
!
! g(1) contiguous matrix full columns
CALL mpi_type_contiguous(n(1)*g(1), MPI_DOUBLE_PRECISION, cols, ierr)
CALL mpi_type_commit(cols, ierr)
!
! Exchange along first dimension
CALL mpi_sendrecv(u%val(s(1), lb(2)), 1, rows, neighs(1), 0, &
& u%val(e(1)+1,lb(2)), 1, rows, neighs(2), 0, &
& comm, MPI_STATUS_IGNORE, ierr)
CALL mpi_sendrecv(u%val(e(1)-g(1)+1,lb(2)), 1, rows, neighs(2), 0, &
& u%val(lb(1), lb(2)), 1, rows, neighs(1), 0, &
& comm, MPI_STATUS_IGNORE, ierr)
!
! Exchange along second dimension
CALL mpi_sendrecv(u%val(lb(1),s(2)), 1, cols, neighs(3), 0, &
& u%val(lb(1),e(2)+1), 1, cols, neighs(4), 0, &
& comm, MPI_STATUS_IGNORE, ierr)
CALL mpi_sendrecv(u%val(lb(1),e(2)-g(2)+1), 1, cols, neighs(4), 0, &
& u%val(lb(1),lb(2)), 1, cols, neighs(3), 0, &
& comm, MPI_STATUS_IGNORE, ierr)
END SUBROUTINE exchange_g_2d
!--------------------------------------------------------------------------------
SUBROUTINE prolong_g_2d(vbar, v)
!
! 2D bilinear prolongation
!
TYPE(gvector_2d), INTENT(in) :: vbar
TYPE(gvector_2d), INTENT(inout) :: v
!
INTEGER :: i,j,i1,i2,j1,j2
!
i1 = v%s(1)-MODULO(v%s(1),2); i2 = v%e(1)+MODULO(v%e(1),2)
j1 = v%s(2)-MODULO(v%s(2),2); j2 = v%e(2)+MODULO(v%e(2),2)
!
! Even numbered nodes on fine mesh
!
DO j=j1,j2,2
DO i=i1,i2,2
v%val(i,j) = vbar%val(i/2,j/2)
END DO
END DO
!
! Linear interpolation on x
!
DO j=j1,j2,2
DO i=i1+1,i2-1,2
v%val(i,j) = 0.5d0*(v%val(i-1,j)+v%val(i+1,j))
END DO
END DO
!
! Linear interpolation on y
!
DO j=j1+1,j2-1,2
DO i=i1,i2
v%val(i,j) = 0.5d0*(v%val(i,j-1)+v%val(i,j+1))
END DO
END DO
END SUBROUTINE prolong_g_2d
!--------------------------------------------------------------------------------
SUBROUTINE init_restrict(grid, prb, comm)
!
! Set up restriction stencil
!
USE stencil, ONLY : init
TYPE(grid2_type), INTENT(inout) :: grid
CHARACTER(len=*), INTENT(in) :: prb
INTEGER, INTENT(in) :: comm
!
INTEGER, PARAMETER :: npoints=9, ndim=2
INTEGER :: s(2), e(2), n(2), id(9,2)
INTEGER :: i, j
!
! Stencil structure initialization
!
s = grid%s
e = grid%e
n = grid%npt-1
!
! N
! 6---7---8
! | | |
! W 4---0---5 E Numbering of stencil
! | | |
! 1---2---3
! S
!
id = RESHAPE([0, -1, 0, 1,-1, 1,-1, 0, 1, &
& 0, -1,-1,-1, 0, 0, 1, 1, 1], &
& [npoints, ndim])
CALL init(s, e, id, .FALSE., grid%restrict_mat, comm)
!
! Fill in stencil
!
DO j=s(2),e(2)
DO i=s(1),e(1)
grid%restrict_mat%val(i,j,:) = [4._rkind, &
& 1._rkind, 2._rkind, 1._rkind, &
& 2._rkind, 2._rkind, &
& 1._rkind, 2._rkind, 1._rkind ]&
& / 16._rkind
END DO
END DO
!
! Apply Dirichlet BC
!
IF(s(1).EQ.0 .AND. prb(1:1).EQ.'d') THEN ! West face
grid%restrict_mat%val(s(1),:,3) = 0._rkind
grid%restrict_mat%val(s(1),:,5) = 0._rkind
grid%restrict_mat%val(s(1),:,8) = 0._rkind
END IF
IF(e(1).EQ.n(1) .AND. prb(2:2).EQ.'d') THEN ! East face
grid%restrict_mat%val(e(1),:,1) = 0._rkind
grid%restrict_mat%val(e(1),:,4) = 0._rkind
grid%restrict_mat%val(e(1),:,6) = 0._rkind
END IF
IF(s(2).EQ.0 .AND. prb(3:3).EQ.'d') THEN ! South face
grid%restrict_mat%val(:,s(2),6) = 0._rkind
grid%restrict_mat%val(:,s(2),7) = 0._rkind
grid%restrict_mat%val(:,s(2),8) = 0._rkind
END IF
IF(e(2).EQ.n(2) .AND. prb(4:4).EQ.'d') THEN ! North face
grid%restrict_mat%val(:,e(2),1) = 0._rkind
grid%restrict_mat%val(:,e(2),2) = 0._rkind
grid%restrict_mat%val(:,e(2),3) = 0._rkind
END IF
END SUBROUTINE init_restrict
!--------------------------------------------------------------------------------
SUBROUTINE jacobi_stencila_2d(mat, omega, nu, v, f)
!
! Weighted Jacobi relaxation
!
TYPE(stencil_2d),INTENT(in) :: mat
REAL(rkind), INTENT(in) :: omega
INTEGER, INTENT(in) :: nu
REAL(rkind), ALLOCATABLE, INTENT(inout) :: v(:,:)
REAL(rkind), ALLOCATABLE, INTENT(in) :: f(:,:)
!
REAL(rkind), ALLOCATABLE :: temp(:,:), inv_diag(:,:)
INTEGER, DIMENSION(2) :: smin, emax, s, e, d, lb, ub
INTEGER :: it, k, i, j
!
s(:) = mat%s(:)
e(:) = mat%e(:)
smin(:) = mat%s0(:)
emax(:) = mat%e0(:)
!
ALLOCATE(temp(s(1):e(1),s(2):e(2)))
ALLOCATE(inv_diag(s(1):e(1),s(2):e(2)))
!
inv_diag(:,:) = omega/mat%val(:,:,0)
DO it=1,nu
temp(:,:) = f(s(1):e(1),s(2):e(2))
DO k=1,mat%npoints-1 ! exclude the diagonal term, f - (L+U)*v
d(:) = mat%id(k,:)
lb = MAX(smin, smin-d, mat%s)
ub = MIN(emax, emax-d, mat%e)
DO j=lb(2),ub(2)
DO i=lb(1),ub(1)
temp(i,j) = temp(i,j) - mat%val(i,j,k)*v(i+d(1),j+d(2))
END DO
END DO
END DO
temp = temp * inv_diag
v(s(1):e(1),s(2):e(2)) = (1.d0-omega)*v(s(1):e(1),s(2):e(2)) + temp
END DO
!
DEALLOCATE(temp)
DEALLOCATE(inv_diag)
END SUBROUTINE jacobi_stencila_2d
!--------------------------------------------------------------------------------
SUBROUTINE jacobi_stencilg_2d(comm, mat, omega, nu, v, f)
!
! Weighted Jacobi relaxation
!
USE gvector, ONLY : ASSIGNMENT(=)
INTEGER, INTENT(in) :: comm
TYPE(stencil_2d),INTENT(in) :: mat
REAL(rkind), INTENT(in) :: omega
INTEGER, INTENT(in) :: nu
TYPE(gvector_2d), INTENT(inout) :: v
TYPE(gvector_2d), INTENT(in) :: f
!
REAL(rkind), ALLOCATABLE :: temp(:,:), inv_diag(:,:)
INTEGER, DIMENSION(2) :: s, e, d
INTEGER :: it, k, i, j
!
s(:) = v%s(:)
e(:) = v%e(:)
!
ALLOCATE(temp(s(1):e(1),s(2):e(2)))
ALLOCATE(inv_diag(s(1):e(1),s(2):e(2)))
!
inv_diag(:,:) = omega/mat%val(:,:,0)
DO it=1,nu
CALL exchange(comm, v)
temp(:,:) = f%val(s(1):e(1),s(2):e(2))
DO k=1,mat%npoints-1 ! exclude the diagonal term, f - (L+U)*v
d(:) = mat%id(k,:)
DO j=s(2),e(2)
DO i=s(1),e(1)
temp(i,j) = temp(i,j) - &
& mat%val(i,j,k) * v%val(i+d(1),j+d(2))
END DO
END DO
END DO
temp = temp * inv_diag
v%val(s(1):e(1),s(2):e(2)) = (1.d0-omega) * v%val(s(1):e(1),s(2):e(2)) + &
& temp
END DO
END SUBROUTINE jacobi_stencilg_2d
!--------------------------------------------------------------------------------
FUNCTION resids_stencila_2d(mat, xarr, farr) RESULT(res)
!
! Return residuals res = mat*x, where x, farr and res are simple arrays
!
TYPE(stencil_2d), INTENT(in) :: mat
REAL(rkind), ALLOCATABLE, INTENT(in) :: xarr(:,:)
REAL(rkind), ALLOCATABLE, INTENT(in) :: farr(:,:)
REAL(rkind) :: res(LBOUND(xarr,1):UBOUND(xarr,1), &
& LBOUND(xarr,2):UBOUND(xarr,2))
INTEGER :: k, i, j
INTEGER, DIMENSION(2) :: smin, emax, d, lb, ub
!
smin(:) = mat%s0(:)
emax(:) = mat%e0(:)
res = farr
DO k=0,mat%npoints-1
d(:) = mat%id(k,:)
lb = MAX(smin, smin-d, mat%s)
ub = MIN(emax, emax-d, mat%e)
DO j=lb(2),ub(2)
DO i=lb(1),ub(1)
res(i,j) = res(i,j) - mat%val(i,j,k)*xarr(i+d(1),j+d(2))
END DO
END DO
END DO
END FUNCTION resids_stencila_2d
!--------------------------------------------------------------------------------
FUNCTION resids_stencilg_2d(comm, mat, xarr, farr) RESULT(res)
!
! Return residuals res= f-mat*x, where x, f and res are gvectors
!
INTEGER, INTENT(in) :: comm
TYPE(stencil_2d), INTENT(in) :: mat
TYPE(gvector_2d), INTENT(inout) :: xarr
TYPE(gvector_2d), INTENT(in) :: farr
TYPE(gvector_2d) :: res
INTEGER :: k, i, j
INTEGER, DIMENSION(2) :: s, e, d
!
s = xarr%s
e = xarr%e
res = gvector_2d(xarr%s, xarr%e, xarr%g)
res%val = farr%val
CALL exchange(comm, xarr)
DO k=0,mat%npoints-1
d(:) = mat%id(k,:)
DO j=s(2),e(2)
DO i=s(1),e(1)
res%val(i,j) = res%val(i,j) - mat%val(i,j,k)*xarr%val(i+d(1),j+d(2))
END DO
END DO
END DO
END FUNCTION resids_stencilg_2d
!--------------------------------------------------------------------------------
SUBROUTINE restrict_g_2d(mat, f, fbar)
!
! 2D full weighting restriction
!
TYPE(stencil_2d), INTENT(in) :: mat
TYPE(gvector_2d), INTENT(in) :: f
TYPE(gvector_2d), INTENT(inout) :: fbar
!
INTEGER, DIMENSION(2) :: s, e, d
INTEGER :: k, i, j
!
s = fbar%s
e = fbar%e
!
! Diagonal contributions: d(0) = (0,0)
DO j=s(2),e(2)
DO i=s(1),e(1)
fbar%val(i,j) = mat%val(i,j,0) * f%val(2*i,2*j)
END DO
END DO
!
DO k=1,mat%npoints-1
d(:) = mat%id(k,:)
DO j=s(2),e(2)
DO i=s(1),e(1)
fbar%val(i,j) = fbar%val(i,j) + &
& mat%val(i,j,k) * f%val(2*i+d(1),2*j+d(2))
END DO
END DO
END DO
END SUBROUTINE restrict_g_2d
!--------------------------------------------------------------------------------
REAL(rkind) FUNCTION norm_vec(x, comm, root)
!
! Infinity vector norm
!
TYPE(gvector_2d), INTENT(in) :: x
INTEGER, INTENT(in) :: comm
INTEGER, OPTIONAL, intent(in) :: root
REAL(rkind) :: temp
INTEGER :: ierr
temp = MAXVAL( ABS(x%val(x%s(1):x%e(1),x%s(2):x%e(2))) )
IF(PRESENT(root)) THEN
CALL mpi_reduce(temp, norm_vec, 1, MPI_DOUBLE_PRECISION, MPI_MAX, &
& root, comm, ierr)
ELSE
CALL mpi_allreduce(temp, norm_vec, 1, MPI_DOUBLE_PRECISION, MPI_MAX, &
& comm, ierr)
END IF
END FUNCTION norm_vec
!--------------------------------------------------------------------------------
REAL(rkind) FUNCTION norm_mat(mat, comm, root)
!
! Infinity matrix norm
!
TYPE(stencil_2d), INTENT(in) :: mat
INTEGER, INTENT(in) :: comm
INTEGER, OPTIONAL, intent(in) :: root
REAL(rkind) :: arr_temp(mat%s(1):mat%e(1),mat%s(2):mat%e(2))
REAL(rkind) :: temp
INTEGER :: i, j, s(2), e(2), ierr
s = mat%s; e = mat%e
DO j=s(2),e(2)
DO i=s(1),e(1)
arr_temp(i,j) = SUM(ABS(mat%val(i,j,:)))
END DO
END DO
temp = MAXVAL(arr_temp)
IF(PRESENT(root)) THEN
CALL mpi_reduce(temp, norm_mat, 1, MPI_DOUBLE_PRECISION, MPI_MAX, &
& root, comm, ierr)
ELSE
CALL mpi_allreduce(temp, norm_mat, 1, MPI_DOUBLE_PRECISION, MPI_MAX, &
& comm, ierr)
END IF
END FUNCTION norm_mat
!--------------------------------------------------------------------------------
END MODULE parmg
diff --git a/multigrid/src/partition.f90 b/multigrid/src/partition.f90
index 1a3b2e6..27cd714 100644
--- a/multigrid/src/partition.f90
+++ b/multigrid/src/partition.f90
@@ -1,77 +1,77 @@
!>
!> @file partition.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
USE mpi
USE pputils2, ONLY : dist1d
USE parmg, ONLY : get_lmax, coarse, disp
IMPLICIT NONE
!
INTEGER :: me, npes, ierr
INTEGER :: n, npt, npt_loc, s, e
INTEGER :: l, lmax
!
CALL MPI_INIT(ierr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD, me, ierr)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD, npes, ierr)
!
IF(me.EQ.0) THEN
WRITE(*,*) 'Enter n'
READ(*,*) n
END IF
CALL mpi_bcast(n,1,MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
!
! Partition ot finest grid
!
npt = n+1
CALL dist1d(MPI_COMM_WORLD, 0, npt, s, npt_loc)
e = s+npt_loc-1
CALL disp('start index=', s, MPI_COMM_WORLD)
!
! Max number of levels
!
lmax = get_lmax(s, npt_loc, 1, MPI_COMM_WORLD)
IF(me.EQ.0) WRITE(*,'(a,i0)') 'Max number of levels ', lmax
!
! Grid coarsening
!
DO l=1,lmax
IF(l.GT.1) THEN
CALL coarse(s, e)
npt_loc = e-s+1
CALL mpi_allreduce(npt_loc, npt, 1, MPI_INTEGER, MPI_SUM, &
& MPI_COMM_WORLD, ierr)
END IF
IF(me.EQ.0) WRITE(*, '(a,i3)') 'level', l
CALL disp('s ', s, MPI_COMM_WORLD)
CALL disp('e ', e, MPI_COMM_WORLD)
CALL disp('npt_loc', npt_loc, MPI_COMM_WORLD)
CALL disp('npt ', npt, MPI_COMM_WORLD)
END DO
!
CALL MPI_FINALIZE(ierr)
!
!+++++
END PROGRAM main
diff --git a/multigrid/src/poisson_fd.f90 b/multigrid/src/poisson_fd.f90
index d44100d..3710133 100644
--- a/multigrid/src/poisson_fd.f90
+++ b/multigrid/src/poisson_fd.f90
@@ -1,485 +1,485 @@
!>
!> @file poisson_fd.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Solving the following 2d PDE using finite differences:
!
! d2/dx2 f(x,y) + tau*d2/dxdy f(x,y) + d2/dy2 f(x,y) = w(x,y), x,y in [0:Lx][0:Ly]
! w(x,y) = -4*pi^2 *[(kx^2/Lx^2+ky^2/Ly^2)*cos(2*kx*pi*x/Lx)*sin(2*ky*pi*y/Ly)
! -tau*(kx*ky)/(Lx*Ly)*sin(2*kx*pi*x/Lx)*cos(2*ky*pi*y/Lx)]
!
! West, East boundaries: Neumann
! South, North boundaries: Dirichlet
!
! Analytic solution : f(x,y) = cos(2*kx*pi*x/Lx)*sin(2*kx*pi*y/Ly)
!
USE multigrid
USE fdmat_mod
IMPLICIT NONE
INCLUDE 'mpif.h'
!
INTEGER, PARAMETER :: nnumx=32
!
INTEGER :: ierr, np, me
DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
DOUBLE PRECISION :: Lx, Ly, icrosst, beta, miome
INTEGER :: n, nx,ny,nz,kx,ky
CHARACTER(len=4) :: prb
INTEGER :: nits
DOUBLE PRECISION :: atol, rtol
LOGICAL :: nldirect, nldebug
!
TYPE(mg_info) :: info ! info for MG
INTEGER :: levels, nnu, mu, nu0
!
INTEGER :: inu, nu1(nnumx), nu2(nnumx), niter(nnumx)
DOUBLE PRECISION :: titer(nnumx)
!
LOGICAL :: nlfixed
DOUBLE PRECISION :: omega
CHARACTER(len=4) :: mat_type, relax
!
DOUBLE PRECISION :: dx, dy
INTEGER :: ix, iy, l, its
DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:), dense(:)
TYPE(grid2d), ALLOCATABLE :: grids(:)
!
DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_ana2d(:,:), sol_direct2d(:,:)
DOUBLE PRECISION, POINTER :: sol_ana(:), sol_direct(:)
DOUBLE PRECISION :: err_direct, resid_direct
DOUBLE PRECISION :: norma, normb
DOUBLE PRECISION, ALLOCATABLE :: rresid(:), resid(:), errdisc(:)
DOUBLE PRECISION :: t0, tsetup, tmat(2), tdirect, tbsolve
DOUBLE PRECISION, EXTERNAL :: mem
!
NAMELIST /parameters/ prb, mat_type,nx, ny, nz, kx, ky, Lx, Ly, icrosst, beta, &
& miome, nldebug, nlfixed, levels, nnu, nu1, nu2, mu, nu0, &
& relax,omega, nldirect, nits, atol, rtol
!--------------------------------------------------------------------------------
! 1.0 Prologue
!
CALL MPI_INIT(ierr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD,me,ierr)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD,np,ierr)
!
! Default inputs
!
nx=32
ny=32
nz=1
kx=1
ky=1
icrosst=1.0d0
Lx = 1.0D0
Ly = 1.0D0
beta = 0d0
miome = 200d0
nldebug = .FALSE.
prb = 'dddd'
mat_type = 'cds'
nldirect = .TRUE.
!
nlfixed = .FALSE.
levels = 2
nnu = 1
nu1 = 1
nu2 = 1
mu = 1
nu0 = 1
nits = 10
atol = 1.e-8; rtol = 1.e-8
relax = 'jac'
omega = 0.6667
!
IF(me==0) THEN
READ(*,parameters)
WRITE(*,parameters)
END IF
!
! Send input parameters to other processors
!
CALL MPI_BCAST(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL MPI_BCAST(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL MPI_BCAST(nz, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL MPI_BCAST(kx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL MPI_BCAST(ky, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL MPI_BCAST(icrosst, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL MPI_BCAST(Lx,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr)
CALL MPI_BCAST(Ly,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr)
CALL MPI_BCAST(beta,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr)
CALL MPI_BCAST(miome, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
!
CALL mpi_bcast(nldebug, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nldirect, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nnu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nu1, nnumx, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nu2, nnumx, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nu0, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(mu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(relax, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(mat_type, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(atol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
!
IF(nnu.GT.nnumx) THEN
IF(me.EQ.0) THEN
PRINT*, 'Value of nnu larger than', nnumx
END IF
CALL mpi_finalize(ierr)
STOP
END IF
!
! Adjust number of levels and fill mg info.
!
levels = MIN(levels, get_lmax(nx), get_lmax(ny))
info%nu1 = nu1(1)
info%nu2 = nu2(1)
info%mu = mu
info%nu0 = nu0
info%levels = levels
info%relax = relax
info%omega = omega
!--------------------------------------------------------------------------------
! 2.0 Setup grids
!
! Grid on the finest level
!
dx = lx/REAL(nx,8)
dy = ly/REAL(ny,8)
ALLOCATE(x(0:nx), y(0:ny))
DO ix=0,nx
x(ix) = ix*dx
END DO
DO iy=0,ny
y(iy) = iy*dy
END DO
WRITE(*,'(a,3(1pe12.3))') 'dx, dy, dx/dy =', dx, dy, dx/dy
!
ALLOCATE(dense(0:nx))
dense = fdense(x)
!
! Set up grids
!
t0 = mpi_wtime()
ALLOCATE(grids(levels))
CALL create_grid_fd(x, y, grids, info, mat_type=mat_type, debug=nldebug)
WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry'
WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels)
IF(nldebug) THEN
CALL printmat('** Prolongation matrix in 1st dim.**', grids(2)%transf(1))
CALL printmat('** Prolongation matrix in 2nd dim.**', grids(2)%transf(2))
END IF
!
! Set BC on grid transfer matrices
!
IF(prb.EQ.'dddd') CALL ibc_transf(grids,1,3) ! Direction X
CALL ibc_transf(grids,2,3) ! Direction Y
tsetup = mpi_wtime()-t0
!--------------------------------------------------------------------------------
! 3.0 Problem discretization
!
! Construct FD matrix and impose BC on all grids
!
t0=mpi_wtime()
DO l=1,levels
CALL fdmat(grids(l), fdense, icrosst)
IF(mat_type.EQ.'csr') CALL to_mat(grids(l)%mata)
END DO
tmat(1) = mpi_wtime()-t0
!
t0=mpi_wtime()
DO l=1,levels
CALL ibc_fdmat(grids(l), prb)
END DO
tmat(2) = mpi_wtime()-t0
!
! Set RHS and impose BC on the fiest grid
!
grids(1)%f(:,:) = frhs(x,y)
!
IF(prb.EQ.'dddd') THEN
grids(1)%f(0,:) = 0.0d0 ! Dirichlet on west and east
grids(1)%f(nx,:) = 0.0d0
ELSE IF(prb.EQ.'nndd') THEN ! Neumann on west and east
grids(1)%f(0,:) = 0.5d0*grids(1)%f(0,:)
grids(1)%f(nx,:) = 0.5d0*grids(1)%f(nx,:)
END IF
grids(1)%f(:,0) = 0.0d0 ! Dirichlet on south and north
grids(1)%f(:,ny) = 0.0d0
!
!--------------------------------------------------------------------------------
! 4.0 Analytical solutions and RHS at the finest grid (l=1)
!
n = (nx+1)*(ny+1) ! Number of unknowns
ALLOCATE(sol_ana2d(0:nx,0:ny))
sol_ana(1:n) => sol_ana2d
sol_ana2d(:,:) = fsol(x,y)
!--------------------------------------------------------------------------------
! 5.0 Direct solution at the finest grid (l=1)
!
IF(nldirect) THEN
WRITE(*,'(/a)') 'Direct solution for the finest grid problem ...'
ALLOCATE(sol_direct2d(0:nx,0:ny))
sol_direct(1:n) => sol_direct2d
!
t0 = mpi_wtime()
sol_direct = grids(1)%f1d
CALL direct_solve(grids(1), sol_direct, debug=nldebug)
tdirect = mpi_wtime()-t0
!
t0 = mpi_wtime()
sol_direct = grids(1)%f1d
CALL direct_solve(grids(1), sol_direct, debug=nldebug)
tbsolve = mpi_wtime()-t0
!
! Max norm and residual
!
err_direct = MAXVAL(ABS(sol_direct-sol_ana))
resid_direct = residue(grids(1), grids(1)%f1d, sol_direct, 'inf')
WRITE(*,'(a,2(1pe12.3))') 'Max norm of error and residual norm', &
& err_direct, resid_direct
END IF
!--------------------------------------------------------------------------------
! 5.0 Iterative solution using MG V-cycle
!
WRITE(*,'(/a)') 'Multigrid MG V-cycles ...'
ALLOCATE(errdisc(0:nits))
ALLOCATE(resid(0:nits))
ALLOCATE(rresid(0:nits))
!
! Norm of A and b
!
IF(mat_type.EQ.'csr') THEN
norma = matnorm(grids(1)%mata, 'inf')
ELSE
norma = matnorm(grids(1)%mata_cds, 'inf')
END IF
normb = MAXVAL(ABS(grids(1)%f1d))
WRITE(*,'(a,2(1pe12.3))') 'Norm A and RHS', norma, normb
!
! Initial guess
!
DO inu=1,nnu
info%nu1 = nu1(inu)
info%nu2 = nu2(inu)
WRITE(*,'(/2(a5,i3,2x))') 'nu1 =', nu1(inu), 'nu2 =', nu2(inu)
IF(nlfixed .AND. nldirect) THEN
grids(1)%v = sol_direct2d
ELSE
grids(1)%v = 0.0d0
END IF
!
errdisc(0) = MAXVAL(ABS(grids(1)%v1d-sol_ana))
resid(0) = residue(grids(1), grids(1)%f1d, grids(1)%v1d, 'inf')
rresid(0) = resid(0) / ( norma*MAXVAL(ABS(grids(1)%v1d)) + normb )
WRITE(*,'(a4,3(a12,a8))') 'its', 'residue', 'ratio', 'disc. err', 'ratio', &
& 'rel. resid', 'ratio'
WRITE(*,'(i4,3(1pe12.3,8x))') 0, resid(0), errdisc(0), rresid(0)
!
! Iterations
!
t0 = mpi_wtime()
DO its=1,nits
CALL mg(grids, info, 1)
errdisc(its) = MAXVAL(ABS(grids(1)%v1d-sol_ana))
resid(its) = residue(grids(1), grids(1)%f1d, grids(1)%v1d, 'inf')
rresid(its) = resid(its) / ( norma*MAXVAL(ABS(grids(1)%v1d)) + normb )
WRITE(*,'((i4,3(1pe12.3,0pf8.2)))') its, &
& resid(its), resid(its)/resid(its-1), &
& errdisc(its), errdisc(its)/errdisc(its-1), &
& rresid(its), rresid(its)/rresid(its-1)
IF(resid(its) .LE. atol .or. rresid(its) .le. rtol) EXIT
END DO
niter(inu) = MIN(nits,its)
titer(inu) = mpi_wtime() - t0
END DO
!--------------------------------------------------------------------------------
! 9.0 Epilogue
!
! Display timing
!
WRITE(*,'(/a)') 'Timing ...'
WRITE(*,'(a,1pe12.3,i5)') 'Setup time (s) ', tsetup
WRITE(*,'(a,2(1pe12.3))') 'Matrix construction time(s)', tmat
WRITE(*,'(a,2(1pe12.3))') 'Direct and bsolve time (s) ', tdirect, tbsolve
WRITE(*,'(/3a6,a15)') 'nu1', 'nu2', 'niter', 'Iter time(s)'
DO inu=1,nnu
WRITE(*,'(3i6,3x,1pe12.3)') nu1(inu), nu2(inu), niter(inu), titer(inu)
END DO
!
WRITE(*,'(/a,f12.3)') 'Mem used so far (MB)', mem()
!
! Creata HDF5 file
!
IF(me.EQ.0) CALL h5file
!
! Clean up
!
DEALLOCATE(x)
DEALLOCATE(y)
DEALLOCATE(dense)
DEALLOCATE(grids)
DEALLOCATE(sol_ana2d)
IF(nldirect) DEALLOCATE(sol_direct2d)
DEALLOCATE(errdisc)
DEALLOCATE(resid)
DEALLOCATE(rresid)
!
CALL MPI_FINALIZE(ierr)
!--------------------------------------------------------------------------------
CONTAINS
!+++
FUNCTION fdense(x)
!
! Return density
!
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: fdense(SIZE(x))
fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 );
END FUNCTION fdense
!+++
FUNCTION frhs(x,y)
!
! Return RHS
!
DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
DOUBLE PRECISION :: frhs(SIZE(x),SIZE(y))
DOUBLE PRECISION :: c, s, d(SIZE(x))
DOUBLE PRECISION :: corr
INTEGER :: j
corr = 1.d0+icrosst**2/4.0d0
d(:) = fdense(x(:))
IF(prb.EQ.'dddd') THEN
DO j=1,SIZE(y)
c = COS(2.0d0*pi*ky*y(j)/Ly)
s = SIN(2.0d0*pi*ky*y(j)/Ly)
frhs(:,j) = -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s &
& -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) &
& + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s
END DO
ELSE IF (prb.EQ.'nndd') THEN
DO j=1,SIZE(y)
c = COS(2.0d0*pi*ky*y(j)/Ly)
s = SIN(2.0d0*pi*ky*y(j)/Ly)
frhs(:,j) = -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s &
& +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) &
& + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s
END DO
END IF
!!$ frhs = -frhs
END FUNCTION frhs
!+++
FUNCTION fsol(x,y)
!
! Return analytical solution
!
DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
DOUBLE PRECISION :: fsol(SIZE(x),SIZE(y))
DOUBLE PRECISION :: c
INTEGER :: j
IF(prb.EQ.'dddd') THEN
DO j=1,SIZE(y)
c = SIN(2.0d0*pi*ky*y(j)/Ly)
fsol(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c
END DO
ELSE IF (prb.EQ.'nndd') THEN
DO j=1,SIZE(y)
c = SIN(2.0d0*pi*ky*y(j)/Ly)
fsol(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c
END DO
END IF
END FUNCTION fsol
!+++
SUBROUTINE h5file
USE futils
CHARACTER(len=128) :: file='poisson_mg.h5'
INTEGER :: fid
INTEGER :: l
CHARACTER(len=64) :: dsname
CALL creatf(file, fid, real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'KX', kx)
CALL attach(fid, '/', 'KY', ky)
CALL attach(fid, '/', 'LX', Lx)
CALL attach(fid, '/', 'LY', Ly)
CALL attach(fid, '/', 'BETA', beta)
CALL attach(fid, '/', 'OMEGA', omega)
CALL attach(fid, '/', 'RELAX', relax)
CALL attach(fid, '/', 'MAT_TYPE', mat_type)
CALL attach(fid, '/', 'NITS', nits)
CALL attach(fid, '/', 'LEVELS', levels)
CALL attach(fid, '/', 'NNU', nnu)
CALL attach(fid, '/', 'NU0', nu0)
CALL attach(fid, '/', 'MU', mu)
!
CALL putarr(fid, '/nu1', nu1(1:nnu))
CALL putarr(fid, '/nu2', nu2(1:nnu))
CALL putarr(fid, '/dense', dense)
CALL creatg(fid, '/mglevels')
DO l=1,levels
WRITE(dsname,'("/mglevels/level.",i2.2)') l
CALL creatg(fid, TRIM(dsname))
IF(mat_type.EQ.'csr') THEN
CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
ELSE
CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata_cds)
END IF
IF(l.GT.1) THEN
CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
END IF
CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
END DO
!
! Solutions at finest grid
!
CALL creatg(fid, '/solutions')
CALL putarr(fid, '/solutions/xg', grids(1)%x)
CALL putarr(fid, '/solutions/yg', grids(1)%y)
CALL putarr(fid, '/solutions/calc', grids(1)%v)
CALL putarr(fid, '/solutions/anal', sol_ana2d)
IF(nldirect) CALL putarr(fid, '/solutions/direct', sol_direct2d)
!
nits=niter(nnu)
CALL creatg(fid, '/Iterations')
CALL putarr(fid, '/Iterations/residues', resid(0:nits))
CALL putarr(fid, '/Iterations/disc_errors', errdisc(0:nits))
!
CALL closef(fid)
END SUBROUTINE h5file
!+++
END PROGRAM main
diff --git a/multigrid/src/ppoisson_fd.f90 b/multigrid/src/ppoisson_fd.f90
index 0fbf675..090a006 100644
--- a/multigrid/src/ppoisson_fd.f90
+++ b/multigrid/src/ppoisson_fd.f90
@@ -1,418 +1,418 @@
!>
!> @file ppoisson_fd.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
!
! Test 2D parallel multigrid V-cycle
!
MODULE mod
USE iso_fortran_env, ONLY : rkind => real64
IMPLICIT NONE
!
REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0)
CONTAINS
END MODULE mod
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
PROGRAM main
USE mpi
USE fdmat_mod, ONLY : fdmat, ibc_fdmat, ibc_rhs
USE pputils2, ONLY : dist1d, timera, hostlist
USE gvector, ONLY : gvector_2d, norm2, ASSIGNMENT(=), OPERATOR(-)
USE parmg, ONLY : grid2_type, mg_info, create_grid, mg, exchange, &
& get_resids, disp, norm_vec, norm_mat
USE stencil, ONLY : stencil_2d, putmat
USE mod
IMPLICIT NONE
!
INTEGER, PARAMETER :: ndims=2
!
INTEGER :: me, neighs(4), npes, ierr
INTEGER, DIMENSION(ndims) :: coords, comm1d
LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.]
LOGICAL :: reorder =.FALSE.
INTEGER :: comm_cart
INTEGER, DIMENSION(ndims) :: e0, s0, e, s, npt_glob, npt_loc
!
REAL(rkind), ALLOCATABLE :: xgrid(:), ygrid(:)
INTEGER, ALLOCATABLE :: id(:,:)
REAL(rkind) :: dx, dy
INTEGER :: npoints ! Number of points in FD stencil
REAL(rkind) :: t_mg, t_mg0, t_mg_min, t_mg_max
!
TYPE(gvector_2d) :: v_exact, resids, errs
REAL(rkind) :: norma, normb, normv
REAL(rkind), ALLOCATABLE :: resid_it(:), err_it(:), rresid(:)
REAL(rkind) :: ratio_err, ratio_resid, ratio_rresid
INTEGER, DIMENSION(ndims) :: g, npt_loc_min
INTEGER :: l, i, it
CHARACTER(len=64) :: str
!
TYPE(grid2_type), ALLOCATABLE :: grids(:)
TYPE(mg_info) :: info ! info for MG
!
! Input quantities
!
LOGICAL :: nldebug=.FALSE.
CHARACTER(len=64) :: filein = 'ppoisson_fd.in'
INTEGER :: dims(2)=[0,0]
CHARACTER(len=4) :: prb='dddd'
CHARACTER(len=4) :: relax='jac'
INTEGER :: nx=4, ny=4 ! Number of intervals
INTEGER :: kx=1, ky=1
REAL(rkind) :: Lx=1.0, Ly=1.0
REAL(rkind) :: icrosst=1.0, beta=0.0, miome=200.0
REAL(rkind) :: omega=1.0d0
INTEGER :: nits=100, direct_solve_nits=5
INTEGER :: levels=2, nu1=3, nu2=3, mu=1, nu0=1
REAL(rkind) :: rtol=1.e-8, atol=1.e-8, errtol=1.e-3
!
NAMELIST /in/ nldebug, dims, prb, nx, ny, kx, ky, Lx, Ly, icrosst, beta, &
& miome, omega, nits, levels, relax, nu1, nu2, mu, nu0, &
& direct_solve_nits, rtol, atol, errtol
!================================================================================
! 1.0 Prologue
!
! 2D process grid
!
CALL mpi_init(ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
!
! Read input filename from commmand line argument
!
IF( command_argument_count() > 0 ) THEN
CALL get_command_argument(1, filein)
END IF
IF(me.EQ.0) WRITE(*,'(a,a)') 'filein = ', TRIM(filein)
!
! Read problem inputs
!
OPEN(unit=99, file=filein, form='formatted')
READ(99,in)
CLOSE(99)
!
CALL mpi_dims_create(npes, ndims, dims, ierr)
CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,&
& ierr)
CALL mpi_comm_rank(comm_cart, me, ierr)
CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr)
CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr)
CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr)
!
CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr)
CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr)
!
info%comm = comm_cart
info%nu1 = nu1
info%nu2 = nu2
info%mu = mu
info%nu0 = nu0
info%levels = levels
info%direct_solve_nits = direct_solve_nits
info%relax = relax
info%omega = omega
!
IF(me.EQ.0) THEN
WRITE(*, in)
END IF
IF(nldebug) THEN
CALL hostlist(comm_cart)
END IF
!================================================================================
! 2.0 2d Grid construction
!
! Partition 2D grid
!
CALL timera(0, 'Grid_construction')
npt_glob(1) = nx+1
npt_glob(2) = ny+1
DO i=1,ndims
CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i))
e(i) = s(i) + npt_loc(i) - 1
END DO
IF(nldebug) THEN
WRITE(*,'(a,i3.3,a,2(3i8,", "))') 'PE', me, ' coords, s,e:', &
& (coords(i),s(i),e(i),i=1,ndims)
END IF
!
! Global mesh
!
dx = Lx/REAL(nx)
dy = Ly/REAL(ny)
ALLOCATE(xgrid(0:nx))
ALLOCATE(ygrid(0:ny))
xgrid = [ (i*dx, i=0,nx) ]
ygrid = [ (i*dy, i=0,ny) ]
!
! Create grid structure
!
ALLOCATE(grids(levels))
npoints = 9 ! Size of FD stencil
ALLOCATE(id(npoints,2))
id=RESHAPE([ 0, -1, 0, 1, -1, 1, -1, 0, 1, &
0, -1, -1, -1, 0, 0, 1, 1, 1], &
[npoints,2])
CALL create_grid(xgrid, ygrid, s, e, id, prb, grids, comm_cart)
!
IF(nldebug) THEN
DO l=1,levels
WRITE(str,'(a,i0)') 'Number of local points at level ', l
CALL disp(TRIM(str), grids(l)%npt_loc, comm_cart)
END DO
END IF
CALL mpi_reduce(grids(levels)%npt_loc, npt_loc_min, 2, MPI_INTEGER, MPI_MIN, &
& 0, comm_cart, ierr)
IF(me.EQ.0) THEN
WRITE(*,'(a,2i4)') 'Minimum local npt at coarsest grid:', npt_loc_min
END IF
!
CALL timera(1, 'Grid_construction')
!================================================================================
! 3.0 FD Operator
!
CALL timera(0, 'FD Operator')
!
DO l=1,levels
CALL fdmat(grids(l), fdense, icrosst, grids(l)%fdmat)
CALL ibc_fdmat(grids(l)%fdmat, prb)
END DO
!
CALL timera(1, 'FD Operator')
!================================================================================
! 4.0 RHS and exact solution at the finest grid (l=1)
!
! Allocate memory
!
CALL timera(0, 'RHS and exact sol')
!
s0 = grids(1)%s0; e0 = grids(1)%e0
s = grids(1)%s; e = grids(1)%e
g = [1,1]
v_exact = gvector_2d(s, e, g) ! Exact solutions
errs = gvector_2d(s, e, g) ! Disc. errors
resids = gvector_2d(s, e, g) ! Residuals
ALLOCATE(resid_it(0:nits))
ALLOCATE(rresid(0:nits))
ALLOCATE(err_it(0:nits))
!
! Set RHS at the finest grid and impose Dirichlet/Neuman BC.
!
grids(1)%f = frhs(xgrid(s(1):e(1)),ygrid(s(2):e(2)))
CALL ibc_rhs(grids(1)%f, s0, e0, prb)
!
! Exact solutions
!
v_exact = fexact(xgrid(s(1):e(1)),ygrid(s(2):e(2)))
!
CALL timera(1, 'RHS and exact sol')
!================================================================================
! 5.0 MG V-cycle iteration loop
!
! Norm of A and b
!
norma = norm_mat(grids(1)%fdmat, comm_cart)
normb = norm_vec(grids(1)%f, comm_cart)
IF(me.EQ.0) THEN
WRITE(*,'(a,2(1pe12.3))') 'Norm A and RHS', norma, normb
END IF
!
grids(1)%v = 0.0d0
resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f)
errs = grids(1)%v - v_exact
err_it(0) = norm_vec(errs, comm_cart, root=0)
resid_it(0) = norm_vec(resids, comm_cart)
normv = norm_vec(grids(1)%v, comm_cart)
rresid(0) = resid_it(0) / ( norma*normv + normb )
!
IF(me.EQ.0) THEN
WRITE(*,'(a4,3(a12,a8))') 'its', 'residue', 'ratio', 'disc. err', 'ratio', &
& 'rel. resid', 'ratio'
WRITE(*,'(i4,3(1pe12.3,8X))') 0, resid_it(0), err_it(0), rresid(0)
END IF
!
CALL timera(0, 'MG V-cycle loop')
t_mg = 0.0d0
DO it=1,nits
t_mg0 = mpi_wtime()
CALL mg(grids, info, 1)
t_mg = t_mg + (mpi_wtime()-t_mg0)
resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f)
errs = grids(1)%v - v_exact
err_it(it) = norm_vec(errs, comm_cart)
resid_it(it) = norm_vec(resids, comm_cart)
normv = norm_vec(grids(1)%v, comm_cart)
rresid(it) = resid_it(it) / ( norma*normv + normb )
ratio_err = err_it(it)/err_it(it-1)
ratio_resid = resid_it(it)/resid_it(it-1)
ratio_rresid= rresid(it)/ rresid(it-1)
IF(me.EQ.0) THEN
WRITE(*,'(i4,3(1pe12.3,0pf8.2))') it, &
& resid_it(it), ratio_resid,&
& err_it(it), ratio_err, &
& rresid(it), ratio_rresid
END IF
IF(resid_it(it) .LE. atol .OR. rresid(it) .LE. rtol .OR. &
& ABS(ratio_err-1._rkind).LT.errtol) THEN
nits = it
EXIT
END IF
END DO
!
CALL timera(1, 'MG V-cycle loop')
CALL mpi_reduce(t_mg, t_mg_max, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm_cart, ierr)
CALL mpi_reduce(t_mg, t_mg_min, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm_cart, ierr)
IF(me.EQ.0) THEN
WRITE(*,'(a,2(1pe10.3)/)') 'Minmax of MG (only) time (s):', t_mg_min, t_mg_max
END IF
!================================================================================
! 9.0 Epilogue
!
IF(nldebug) THEN
CALL h5file
END IF
!
CALL timera(9, '')
CALL MPI_FINALIZE(ierr)
CONTAINS
!
!+++
FUNCTION fdense(x)
!
! Return density
!
REAL(rkind), INTENT(in) :: x(:)
REAL(rkind) :: fdense(SIZE(x))
fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 );
END FUNCTION fdense
!+++
FUNCTION fexact(x,y)
!
! Return analytical solution
!
REAL(rkind), INTENT(in) :: x(:), y(:)
REAL(rkind) :: fexact(SIZE(x),SIZE(y))
REAL(rkind) :: c
INTEGER :: j
IF(prb.EQ.'dddd') THEN
DO j=1,SIZE(y)
c = SIN(2.0d0*pi*ky*y(j)/Ly)
fexact(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c
END DO
ELSE IF (prb.EQ.'nndd') THEN
DO j=1,SIZE(y)
c = SIN(2.0d0*pi*ky*y(j)/Ly)
fexact(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c
END DO
END IF
END FUNCTION fexact
!+++
FUNCTION frhs(x,y)
!
! Return RHS
!
REAL(rkind), INTENT(in) :: x(:), y(:)
REAL(rkind) :: frhs(SIZE(x),SIZE(y))
REAL(rkind) :: c, s, d(SIZE(x))
REAL(rkind) :: corr
INTEGER :: j
corr = 1.d0+icrosst**2/4.0d0
d(:) = fdense(x(:))
IF(prb.EQ.'dddd') THEN
DO j=1,SIZE(y)
c = COS(2.0d0*pi*ky*y(j)/Ly)
s = SIN(2.0d0*pi*ky*y(j)/Ly)
frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s &
& -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) &
& + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s
END DO
ELSE IF (prb.EQ.'nndd') THEN
DO j=1,SIZE(y)
c = COS(2.0d0*pi*ky*y(j)/Ly)
s = SIN(2.0d0*pi*ky*y(j)/Ly)
frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s &
& +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) &
& + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s
END DO
END IF
END FUNCTION frhs
!+++!
FUNCTION outerprod(x, y) RESULT(r)
!
! outer product
!
REAL(rkind), INTENT(in) :: x(:), y(:)
REAL(rkind) :: r(SIZE(x),SIZE(y))
INTEGER :: i, j
DO j=1,SIZE(y)
DO i=1,SIZE(x)
r(i,j) = x(i)*y(j)
END DO
END DO
END FUNCTION outerprod
!+++
SUBROUTINE h5file
!
! Result hdf5 file
!
USE futils
CHARACTER(len=128) :: file='ppoisson_fd.h5'
INTEGER :: fid
CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart)
CALL attach(fid, '/', 'PRB', prb)
CALL attach(fid, '/', 'RELAX', relax)
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'KX', kx)
CALL attach(fid, '/', 'KY', ky)
CALL attach(fid, '/', 'LX', Lx)
CALL attach(fid, '/', 'LY', Ly)
CALL attach(fid, '/', 'ICROSST', icrosst)
CALL attach(fid, '/', 'BETA', beta)
CALL attach(fid, '/', 'MIOME', miome)
CALL attach(fid, '/', 'OMEGA', omega)
CALL attach(fid, '/', 'NITS', nits)
CALL attach(fid, '/', 'DIRECT_SOLVE_NITS', direct_solve_nits)
CALL attach(fid, '/', 'LEVELS', levels)
CALL attach(fid, '/', 'NU1', nu1)
CALL attach(fid, '/', 'NU2', nu2)
CALL attach(fid, '/', 'NU0', nu0)
CALL attach(fid, '/', 'MU', mu)
!
CALL putarr(fid, '/xgrid', xgrid, ionode=0) ! only rank 0 does IO
CALL putarr(fid, '/ygrid', ygrid, ionode=0) ! only rank 0 does IO
!
CALL putarrnd(fid, '/f', grids(1)%f%val, (/1,2/), garea=g)
CALL putarrnd(fid, '/v', v_exact%val, (/1,2/), garea=g)
CALL putarrnd(fid, '/u', grids(1)%v%val, (/1,2/), garea=g)
CALL putarrnd(fid, '/errs', errs%val, (/1,2/), garea=(/1,1/))
CALL putarrnd(fid, '/resids', resids%val,(/1,2/), garea=(/1,1/))
!
CALL putarr(fid, '/resid', resid_it(0:nits), ionode=0)
CALL putarr(fid, '/error', err_it(0:nits), ionode=0)
!
CALL putmat(fid, '/MAT', grids(1)%fdmat)
CALL closef(fid)
END SUBROUTINE h5file
!+++
END PROGRAM main
diff --git a/multigrid/src/stencil_mod.f90 b/multigrid/src/stencil_mod.f90
index 04095c5..cd2618a 100644
--- a/multigrid/src/stencil_mod.f90
+++ b/multigrid/src/stencil_mod.f90
@@ -1,243 +1,243 @@
!>
!> @file stencil_mod.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
MODULE stencil
!
! stencil_2d: Implement 2D stencil for matrix-less operations
!
! T.M. Tran, CRPP-EPFL
! August 2013
!
USE iso_fortran_env, ONLY : rkind => real64
IMPLICIT NONE
!
PRIVATE
PUBLIC :: stencil_2d, init, vmx, putmat, laplacian, &
& OPERATOR(*)
!
TYPE stencil_2d
LOGICAL :: nluni
INTEGER, DIMENSION(2) :: ldim, gdim
INTEGER, DIMENSION(2) :: s0, e0, s, e
INTEGER :: npoints
INTEGER, ALLOCATABLE :: id(:,:)
REAL(rkind), ALLOCATABLE :: val(:,:,:)
END TYPE stencil_2d
!
INTERFACE init
MODULE PROCEDURE init_stencil_2d
END INTERFACE init
INTERFACE vmx
MODULE PROCEDURE vmx_stencila_2d
MODULE PROCEDURE vmx_stencilg_2d
END INTERFACE vmx
INTERFACE putmat
module procedure putmat_stencil
END INTERFACE putmat
!
INTERFACE OPERATOR(*)
MODULE PROCEDURE vmx_stencila_2d
MODULE PROCEDURE vmx_stencilg_2d
END INTERFACE OPERATOR(*)
!
CONTAINS
!================================================================================
SUBROUTINE init_stencil_2d(s, e, id, nluni, mat, comm)
!
! stencil_2d constructor
!
USE mpi
INTEGER, INTENT(in) :: s(2), e(2) ! Bounds in each dim.
INTEGER, INTENT(in) :: id(:,:) ! Structure of stencil
LOGICAL, INTENT(in) :: nluni ! Uniform stencil
TYPE(stencil_2d), INTENT(out) :: mat
INTEGER, INTENT(in) :: comm
INTEGER :: me, ndim=2, ierr
INTEGER :: npoints ! Size of the stencil
!
CALL mpi_comm_rank(comm, me, ierr)
!
IF(id(1,1).NE.0 .AND. id(1,2).NE.0) THEN
IF(me.EQ.0) THEN
WRITE(*,*) 'INIT_STENCIL: id(1,:) should be (0,0)!'
CALL mpi_abort(comm, -1, ierr)
END IF
END IF
!
npoints = SIZE(id,1)
mat%npoints = npoints
mat%s = s
mat%e = e
mat%nluni = nluni
IF(nluni) THEN
ALLOCATE(mat%val(1,1, 0:npoints-1))
ELSE
ALLOCATE(mat%val(s(1):e(1), s(2):e(2), 0:npoints-1))
END IF
ALLOCATE(mat%id(0:npoints-1, ndim))
mat%id(:,:) = id(:,:)
mat%val(:,:,:) = 0.0
!
mat%ldim = e-s+1
CALL mpi_allreduce(mat%s, mat%s0, ndim, MPI_INTEGER, MPI_MIN, comm, ierr)
CALL mpi_allreduce(mat%e, mat%e0, ndim, MPI_INTEGER, MPI_MAX, comm, ierr)
mat%gdim = mat%e0 - mat%s0 + 1
!
END SUBROUTINE init_stencil_2d
!================================================================================
FUNCTION vmx_stencila_2d(mat, xarr) RESULT(res)
!
! Return product res = mat*x, where x and res are simple arrays
!
TYPE(stencil_2d), INTENT(in) :: mat
REAL(rkind), ALLOCATABLE, INTENT(in) :: xarr(:,:)
REAL(rkind) :: res(LBOUND(xarr,1):UBOUND(xarr,1), &
& LBOUND(xarr,2):UBOUND(xarr,2))
INTEGER :: k, i, j
INTEGER, DIMENSION(2) :: smin, emax, d, lb, ub
!
smin(:) = mat%s0(:)
emax(:) = mat%e0(:)
res = 0.0
DO k=0,mat%npoints-1
d(:) = mat%id(k,:)
lb = MAX(smin, smin-d, mat%s)
ub = MIN(emax, emax-d, mat%e)
DO j=lb(2),ub(2)
DO i=lb(1),ub(1)
res(i,j) = res(i,j) + mat%val(i,j,k)*xarr(i+d(1),j+d(2))
END DO
END DO
END DO
END FUNCTION vmx_stencila_2d
!================================================================================
FUNCTION vmx_stencilg_2d(mat, xarr) RESULT(res)
!
! Return product res= mat*x, where x and res are gvectors
!
USE gvector, ONLY : gvector_2d
TYPE(stencil_2d), INTENT(in) :: mat
TYPE(gvector_2d), INTENT(in) :: xarr
TYPE(gvector_2d) :: res
INTEGER :: k, i, j
INTEGER, DIMENSION(2) :: d, s, e
!
s = xarr%s
e = xarr%e
res = gvector_2d(xarr%s, xarr%e, xarr%g)
!
! Diagonal contributions: d(0) = (0,0)
DO j=s(2),e(2)
DO i=s(1),e(1)
res%val(i,j) = mat%val(i,j,0)*xarr%val(i,j)
END DO
END DO
!
DO k=1,mat%npoints-1
d(:) = mat%id(k,:)
DO j=s(2),e(2)
DO i=s(1),e(1)
res%val(i,j) = res%val(i,j) + mat%val(i,j,k)*xarr%val(i+d(1),j+d(2))
END DO
END DO
END DO
END FUNCTION vmx_stencilg_2d
!================================================================================
SUBROUTINE putmat_stencil(fid, label, mat, str)
USE futils
INTEGER, INTENT(in) :: fid
CHARACTER(len=*), INTENT(in) :: label
TYPE(stencil_2d), INTENT(in) :: mat
CHARACTER(len=*), INTENT(in), OPTIONAL :: str
!
IF(PRESENT(str)) THEN
CALL creatg(fid, label, str)
ELSE
CALL creatg(fid, label)
END IF
!
CALL putarr(fid, TRIM(label)//'/dists', mat%id, ionode=0)
CALL putarrnd(fid, TRIM(label)//'/val', mat%val, (/1,2/))
END SUBROUTINE putmat_stencil
!=======================================================================
SUBROUTINE laplacian(dx, dy, mat)
!
! Construct a Laplacian using 5-point FD discretization
! Assume homegeneous Dirichlet BC on all 4 faces.
!
REAL(rkind), INTENT(in) :: dx, dy
TYPE(stencil_2d), INTENT(inout) :: mat
!
INTEGER :: i, j, k
INTEGER :: ieast, iwest, jsouth, jnorth
INTEGER, DIMENSION(2) :: d
REAL(rkind) :: dx2inv, dy2inv
!
! Assemble the stencil
!
dx2inv = 1.0d0/dx**2
dy2inv = 1.0d0/dy**2
!
mat%val(:,:,0) = -2.0d0*(dx2inv+dy2inv) ! Diagonal
!
DO k=1,mat%npoints-1 ! Off diagonal
d = mat%id(k,:)
DO j=mat%s(2),mat%e(2)
DO i=mat%s(1),mat%e(1)
IF(d(1).EQ.0) THEN ! north and south
mat%val(i,j,k) = dy2inv
ELSE IF(d(2).EQ.0) THEN ! east and west
mat%val(i,j,k) = dx2inv
END IF
END DO
END DO
END DO
!
! Impose Dirichlet BC on all 4 boundaries
!
ieast = mat%s0(1)
IF(ieast .EQ. mat%s(1)) THEN ! East boundary
mat%val(ieast, mat%s(2):mat%e(2), :) = 0.0
mat%val(ieast, mat%s(2):mat%e(2), 0) = 1.0
END IF
iwest = mat%e0(1)
IF(iwest .EQ. mat%e(1)) THEN ! West boundary
mat%val(iwest, mat%s(2):mat%e(2), :) = 0.0
mat%val(iwest, mat%s(2):mat%e(2), 0) = 1.0
END IF
jsouth = mat%s0(2)
IF(jsouth .EQ. mat%s(2)) THEN ! South boundary
mat%val(mat%s(1):mat%e(1), jsouth, :) = 0.0
mat%val(mat%s(1):mat%e(1), jsouth, 0) = 1.0
END IF
jnorth = mat%e0(2)
IF(jnorth .EQ. mat%e(2)) THEN ! North boundary
mat%val(mat%s(1):mat%e(1), jnorth, :) = 0.0
mat%val(mat%s(1):mat%e(1), jnorth, 0) = 1.0
END IF
!
END SUBROUTINE laplacian
!================================================================================
END MODULE stencil
diff --git a/multigrid/src/test_csr.f90 b/multigrid/src/test_csr.f90
index 02172b2..bd6b2e1 100644
--- a/multigrid/src/test_csr.f90
+++ b/multigrid/src/test_csr.f90
@@ -1,145 +1,145 @@
!>
!> @file test_csr.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test routines of module csr_mod
!
USE multigrid
USE csr
IMPLICIT NONE
!
INTEGER :: nx=8, nidbas=1, alpha=0, modem=10
DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0
LOGICAL :: nlper=.FALSE.
INTEGER :: ngauss, nterms
INTEGER :: i, j
!
TYPE(grid1d) :: gridx(1)
TYPE(csr_mat) :: mata
!
DOUBLE PRECISION, ALLOCATABLE :: arow(:), sum_row(:)
DOUBLE PRECISION, ALLOCATABLE :: acol(:), sum_col(:)
DOUBLE PRECISION, ALLOCATABLE :: sol(:), rhs(:)
!
NAMELIST /newrun/ nx, nidbas, sigma, kmode, modem, alpha, nlper
!----------------------------------------------------------------------------
READ(*,newrun)
WRITE(*,newrun)
!
! Set grid
!
ngauss = CEILING(REAL(2*nidbas+alpha+1,8)/2.d0)
CALL create_grid(nx, nidbas, ngauss, alpha, gridx, nlper)
!
! Create FE matrice and set BC u(0)=u(1)=0
!
nterms = 3
CALL femat(gridx(1)%spl, mata, coefeq, nterms)
CALL to_mat(mata)
WRITE(*,'(/a,2i6)') 'rank, nnz', mata%rank, mata%nnz
WRITE(*,'(a/(12(1pe12.3)))') 'diag', mata%val(mata%idiag)
ALLOCATE(arow(mata%rank))
ALLOCATE(acol(mata%rank))
ALLOCATE(sum_row(mata%rank))
ALLOCATE(sum_col(mata%rank))
sum_col = 0.0d0
DO i=1,mata%rank
CALL getrow(mata, i, arow)
sum_row(i) = SUM(arow)
sum_col = sum_col+arow
IF(i.EQ.1) WRITE(*,'(/a)') 'Matrix A'
WRITE(*,'(12(1pe12.3))') arow
END DO
WRITE(*,'(a/(12(1pe12.3)))') 'sum of row', sum_row
WRITE(*,'(a/(12(1pe12.3)))') 'sum of col', sum_col
DO j=1,mata%rank
CALL getcol(mata, j, acol)
sum_col(j) = SUM(acol)
END DO
WRITE(*,'(a/(12(1pe12.3)))') 'sum of col', sum_col
!
! Clear and rebuild matrix
!
WRITE(*,'(/a)') 'Clear and rebuild matrix ...'
CALL clear_mat(mata)
CALL femat(gridx(1)%spl, mata, coefeq, nterms)
WRITE(*,'(a,2i6)') 'rank, nnz', mata%rank, mata%nnz
DO i=1,mata%rank
CALL getrow(mata, i, arow)
WRITE(*,'(12(1pe12.3))') arow
END DO
WRITE(*,'(a/(12(1pe12.3)))') 'diag', mata%val(mata%idiag)
!
! Test VMX
!
ALLOCATE(sol(mata%rank))
ALLOCATE(rhs(mata%rank))
sol = 1.0d0
!
rhs = vmx(mata, sol)
acol = rhs-sum_row
WRITE(*,'(/a)') 'Test VMX ...'
WRITE(*,'(a/(12(1pe12.3)))') 'amat*sol', rhs
WRITE(*,'(a,1pe12.3)') 'Error norm =', SQRT(DOT_PRODUCT(acol,acol))
!
rhs = vmx(mata, sol, 'T')
acol = rhs-sum_col
WRITE(*,'(a/(12(1pe12.3)))') "amat'*sol", rhs
WRITE(*,'(a,1pe12.3)') 'Error norm =', SQRT(DOT_PRODUCT(acol,acol))
!
CALL destroy(mata)
CONTAINS
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(:)
SELECT CASE (alpha)
CASE(0) ! Cartesian geometry
c(1) = 1.0d0
idt(1) = 1
idw(1) = 1
c(2) = sigma
idt(2) = 0
idw(2) = 0
c(3) = 1.0d0
idt(3) = 1
idw(3) = 0
CASE(1)
c(1) = x
idt(1) = 1
idw(1) = 1
c(2) = modem**2/x
idt(2) = 0
idw(2) = 0
c(3) = 1.0d0
idt(3) = 1
idw(3) = 0
CASE default
WRITE(*,'(a,i0,a)') 'COEFEQ: alpha ', alpha, ' not defined!'
END SELECT
END SUBROUTINE coefeq
!----------------------------------------------------------------------------
END PROGRAM main
diff --git a/multigrid/src/test_gvec1d.f90 b/multigrid/src/test_gvec1d.f90
index 1dd22f5..d67e45a 100644
--- a/multigrid/src/test_gvec1d.f90
+++ b/multigrid/src/test_gvec1d.f90
@@ -1,190 +1,190 @@
!>
!> @file test_gvec1d.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
!
! Test implementation of 1D vectors with arbirary
! vector bounds and ghost cell width.
!
! T.M. Tran (09/2013)
!
MODULE gvector
USE iso_fortran_env, ONLY : rkind => real64
IMPLICIT NONE
PRIVATE
PUBLIC :: rkind, gvector_1d, disp, norm2, &
& OPERATOR(+), OPERATOR(-), OPERATOR(*), &
& ASSIGNMENT(=)
TYPE gvector_1d
INTEGER :: s, e, g
REAL(rkind), ALLOCATABLE :: val(:)
END TYPE gvector_1d
INTERFACE gvector_1d
MODULE PROCEDURE constructor
END INTERFACE gvector_1d
INTERFACE OPERATOR(+)
MODULE PROCEDURE add_scal
MODULE PROCEDURE add_vec
END INTERFACE OPERATOR(+)
INTERFACE OPERATOR(-)
MODULE PROCEDURE minus_vec
MODULE PROCEDURE substract_vec
END INTERFACE OPERATOR(-)
INTERFACE OPERATOR(*)
MODULE PROCEDURE scale_left
MODULE PROCEDURE scale_right
END INTERFACE OPERATOR(*)
INTERFACE ASSIGNMENT(=)
MODULE PROCEDURE from_scal
MODULE PROCEDURE from_vec
END INTERFACE ASSIGNMENT(=)
INTERFACE norm2
module procedure norm2_gvector_1d
END INTERFACE norm2
CONTAINS
FUNCTION constructor(s, e, g) RESULT(res)
INTEGER, INTENT(in) :: s, e
INTEGER, OPTIONAL, INTENT(in) :: g
TYPE(gvector_1d) :: res
INTEGER :: lb, ub
res%g= 0
IF(PRESENT(g)) res%g=g
res%s=s
res%e=e
lb = res%s-res%g
ub = res%e+res%g
ALLOCATE(res%val(lb:ub))
res%val = -9999.0
END FUNCTION constructor
FUNCTION add_vec(lhs, rhs) RESULT(res)
TYPE(gvector_1d), INTENT(in) :: lhs, rhs
TYPE(gvector_1d) :: res
res = gvector_1d(lhs%s, lhs%e, lhs%g)
res%val(res%s:res%e) = lhs%val(res%s:res%e) + rhs%val(res%s:res%e)
END FUNCTION add_vec
FUNCTION add_scal(lhs, rhs) RESULT(res)
TYPE(gvector_1d), INTENT(in) :: lhs
REAL(rkind), INTENT(in) :: rhs
TYPE(gvector_1d) :: res
res = gvector_1d(lhs%s, lhs%e, lhs%g)
res%val(res%s:res%e) = lhs%val(res%s:res%e) + rhs
END FUNCTION add_scal
FUNCTION minus_vec(this) RESULT(res)
TYPE(gvector_1d), INTENT(in) :: this
TYPE(gvector_1d) :: res
res = gvector_1d(this%s, this%e, this%g)
res%val(res%s:res%e) = -this%val(res%s:res%e)
END FUNCTION minus_vec
FUNCTION substract_vec(lhs, rhs) RESULT(res)
TYPE(gvector_1d), INTENT(in) :: lhs, rhs
TYPE(gvector_1d) :: res
res = gvector_1d(lhs%s, lhs%e, lhs%g)
res = lhs + (-rhs)
END FUNCTION substract_vec
FUNCTION scale_left(lhs, rhs) RESULT(res)
REAL(rkind), INTENT(in) :: lhs
TYPE(gvector_1d), INTENT(in) :: rhs
TYPE(gvector_1d) :: res
res = gvector_1d(rhs%s, rhs%e, rhs%g)
res%val(res%s:res%e) = lhs * rhs%val(res%s:res%e)
END FUNCTION scale_left
FUNCTION scale_right(lhs, rhs) RESULT(res)
TYPE(gvector_1d), INTENT(in) :: lhs
REAL(rkind), INTENT(in) :: rhs
TYPE(gvector_1d) :: res
res = gvector_1d(lhs%s, lhs%e, lhs%g)
res%val(res%s:res%e) = rhs * lhs%val(res%s:res%e)
END FUNCTION scale_right
SUBROUTINE from_vec(lhs, rhs)
TYPE(gvector_1d), INTENT(inout) :: lhs
REAL(rkind), INTENT(in) :: rhs(:)
INTEGER :: n
n = lhs%e - lhs%s + 1
IF(SIZE(rhs) .NE. n) THEN
PRINT*, 'from_vec: sizes of rhs and lhs not equal!'
STOP
END IF
lhs%val(lhs%s:lhs%e) = rhs(1:n)
END SUBROUTINE from_vec
SUBROUTINE from_scal(lhs, rhs)
TYPE(gvector_1d), INTENT(inout) :: lhs
REAL(rkind), INTENT(in) :: rhs
lhs%val(lhs%s:lhs%e) = rhs
END SUBROUTINE from_scal
SUBROUTINE disp(str,this)
CHARACTER(len=*), INTENT(in) :: str
TYPE(gvector_1d), INTENT(in) :: this
WRITE(*,'(/a,3i6)') str//': s, e, g =', this%s, this%e, this%g
WRITE(*,'(10(1pe12.3))') this%val
END SUBROUTINE disp
FUNCTION norm2_gvector_1d(this) RESULT(res)
TYPE(gvector_1d), INTENT(in) :: this
REAL(rkind) :: res
res = NORM2(this%val(this%s:this%e))
END FUNCTION norm2_gvector_1d
END MODULE gvector
PROGRAM main
USE gvector
IMPLICIT NONE
INTEGER :: s=0, e=5, g=1
INTEGER :: i, lb, ub
REAL(rkind) :: a=0.1
TYPE(gvector_1d) :: v1, v2, v3
!
lb = s-g
ub = e+g
v1 = gvector_1d(s, e, g)
v1%val(s:e) = [ (i, i=s,e) ]
CALL disp('v1', v1)
!
v2 = v1 + a*v1
CALL disp('v1+a*v1', v2)
!
v3 = v1 - v1*a
CALL disp('v1-v1*a', v3)
!
WRITE(*,'(a,1pe12.3)') 'norm of v1 =', NORM2(v1)
WRITE(*,'(a,1pe12.3)') 'norm of v1-a*v1 =', NORM2(v1-a*v1)
!
v1 = 0.0d0
CALL disp('Should be all zero', v1)
v2 = [ 1.d0, 2.d0, 3.d0, 4.d0, 5.d0, 6.d0 ]
CALL disp('Should be (1. 2. 3. 4. 5. 6.)', v2)
END PROGRAM main
diff --git a/multigrid/src/test_intergrid0.f90 b/multigrid/src/test_intergrid0.f90
index 7a76d32..f08d611 100644
--- a/multigrid/src/test_intergrid0.f90
+++ b/multigrid/src/test_intergrid0.f90
@@ -1,230 +1,230 @@
!>
!> @file test_intergrid0.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! intergrid transfer using *serial* multigrid module:
! - restriction of rhs
! - prolongation of sol
!
USE multigrid, ONLY : grid2d, mg_info, &
& get_lmax, create_grid_fd, ibc_transf, &
& prolong, restrict
IMPLICIT NONE
DOUBLE PRECISION, PARAMETER :: pi=4.0d0*ATAN(1.0d0)
DOUBLE PRECISION :: Lx, Ly, kx, ky, icrosst, beta, miome
INTEGER :: nx, ny, levels
CHARACTER(len=4) :: prb
LOGICAL :: nldebug
!
DOUBLE PRECISION :: dx, dy
DOUBLE PRECISION, ALLOCATABLE :: x(:),y(:)
!
TYPE(mg_info) :: info ! info for MG
TYPE(grid2d), ALLOCATABLE :: grids(:)
!
INTEGER :: i, l
!
NAMELIST /parameters/ prb, nx, ny, levels, Lx, Ly, kx, ky, icrosst, beta, &
& miome, nldebug
!--------------------------------------------------------------------------------
!
! Default inputs
!
nx=32
ny=32
levels = 2
kx=1
ky=1
icrosst=1.0d0
Lx = 1.0D0
Ly = 1.0D0
miome = 200d0
beta = 0d0
prb = 'dddd'
nldebug = .FALSE.
!
READ(*,parameters)
WRITE(*,parameters)
!
! Fine grid
!
dx = lx/REAL(nx,8)
dy = ly/REAL(ny,8)
ALLOCATE(x(0:nx), y(0:ny))
x = dx * [(i,i=0,nx)]
y = dy * [(i,i=0,ny)]
WRITE(*,'(a/10(1pe12.3))') 'x =', x
WRITE(*,'(a/10(1pe12.3))') 'y =', y
!
! Create array of grids
!
levels = MIN(levels, get_lmax(nx), get_lmax(ny))
WRITE(*,'(a,i4)') 'Number of levels', levels
ALLOCATE(grids(levels))
info%nu1 = 1
info%nu2 = 1
info%mu = 1
info%nu0 = 1
info%levels = levels
info%relax = 'jac'
info%omega = 1
CALL create_grid_fd(x, y, grids, info, mat_type='cds', debug=nldebug)
!
! Set BC on grid transfer matrices
!
IF(prb.EQ.'dddd') CALL ibc_transf(grids,1,3) ! Direction X
CALL ibc_transf(grids,2,3) ! Direction Y
!
! Define RHS at l=1, compute RHS at l=2,...,levels by "restriction".
!
grids(1)%f(:,:) = frhs(grids(1)%x,grids(1)%y)
DO l=2,levels
grids(l)%f = restrict(grids(l)%matp, grids(l-1)%f)
grids(l)%f = 0.25d0*grids(l)%f ! Scaling for FD
END DO
!
! Define SOL at l=levels, compute SOL at l=levels-1,..,1 by "prolongation"
!
grids(levels)%v(:,:) = fsol(grids(levels)%x,grids(levels)%y)
DO l=levels-1,1,-1
grids(l)%v = prolong(grids(l+1)%matp, grids(l+1)%v)
END DO
!
IF(nldebug) THEN
DO l=1,levels
WRITE(*,'(a,i3)') '==== Level', l
WRITE(*,'(a)') 'f ='
DO i=0,grids(l)%n(1)
WRITE(*,'(10f8.3)') grids(l)%f(i,:)
END DO
WRITE(*,'(a)') 'v ='
DO i=0,grids(l)%n(1)
WRITE(*,'(10f8.3)') grids(l)%v(i,:)
END DO
END DO
END IF
!
! Epilogue
!
CALL h5file
!--------------------------------------------------------------------------------
CONTAINS
!+++
FUNCTION fdense(x)
!
! Return density
!
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: fdense(SIZE(x))
fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 );
END FUNCTION fdense
!+++
FUNCTION frhs(x,y)
!
! Return RHS
!
DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
DOUBLE PRECISION :: frhs(SIZE(x),SIZE(y))
DOUBLE PRECISION :: c, s, d(SIZE(x))
DOUBLE PRECISION :: corr
INTEGER :: j
corr = 1.d0+icrosst**2/4.0d0
d(:) = fdense(x(:))
IF(prb.EQ.'dddd') THEN
DO j=1,SIZE(y)
c = COS(2.0d0*pi*ky*y(j)/Ly)
s = SIN(2.0d0*pi*ky*y(j)/Ly)
frhs(:,j) = -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s &
& -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) &
& + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s
END DO
ELSE IF (prb.EQ.'nndd') THEN
DO j=1,SIZE(y)
c = COS(2.0d0*pi*ky*y(j)/Ly)
s = SIN(2.0d0*pi*ky*y(j)/Ly)
frhs(:,j) = -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s &
& +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) &
& + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s
END DO
END IF
END FUNCTION frhs
!+++
FUNCTION fsol(x,y)
!
! Return analytical solution
!
DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
DOUBLE PRECISION :: fsol(SIZE(x),SIZE(y))
DOUBLE PRECISION :: c
INTEGER :: j
IF(prb.EQ.'dddd') THEN
DO j=1,SIZE(y)
c = SIN(2.0d0*pi*ky*y(j)/Ly)
fsol(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c
END DO
ELSE IF (prb.EQ.'nndd') THEN
DO j=1,SIZE(y)
c = SIN(2.0d0*pi*ky*y(j)/Ly)
fsol(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c
END DO
END IF
END FUNCTION fsol
!+++
SUBROUTINE h5file
USE futils
USE csr, ONLY : putmat
CHARACTER(len=128) :: file='test_intergrid0.h5'
INTEGER :: fid
INTEGER :: l
CHARACTER(len=64) :: dsname
CALL creatf(file, fid, real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NY', ny)
CALL attach(fid, '/', 'KX', kx)
CALL attach(fid, '/', 'KY', ky)
CALL attach(fid, '/', 'LX', Lx)
CALL attach(fid, '/', 'LY', Ly)
CALL attach(fid, '/', 'BETA', beta)
CALL attach(fid, '/', 'LEVELS', levels)
CALL attach(fid, '/', 'PRB', prb)
CALL attach(fid, '/', 'NLDEBUG', nldebug)
CALL creatg(fid, '/mglevels')
DO l=1,levels
WRITE(dsname,'("/mglevels/level.",i2.2)') l
CALL creatg(fid, TRIM(dsname))
IF(l.GT.1) THEN
CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
END IF
CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
END DO
CALL closef(fid)
END SUBROUTINE h5file
!+++
END PROGRAM main
diff --git a/multigrid/src/test_intergrid1.f90 b/multigrid/src/test_intergrid1.f90
index 0d0a23b..514ad2f 100644
--- a/multigrid/src/test_intergrid1.f90
+++ b/multigrid/src/test_intergrid1.f90
@@ -1,240 +1,240 @@
!>
!> @file test_intergrid1.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test implementation of (parallel) matrix-free
!
USE iso_fortran_env, ONLY : rkind => real64
USE parmg, ONLY : grid2_type, init_restrict, coarse, get_lmax, &
& exchange, prolong, restrict, disp, norm_vec
USE pputils2, ONLY : dist1d
USE gvector, ONLY : gvector_2d,OPERATOR(-)
USE futils
USE mpi
IMPLICIT NONE
!
INTEGER, PARAMETER :: ndims=2
INTEGER :: ierr, me, npes
INTEGER, DIMENSION(ndims) :: dims=[0,0]
INTEGER, DIMENSION(ndims) :: lmax, coords, comm1d
LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.]
LOGICAL :: reorder =.FALSE.
INTEGER :: comm_cart, comm_futils
!
INTEGER :: fin
CHARACTER(len=64) :: filein = 'test_intergrid0.h5'
CHARACTER(len=64) :: dsname
CHARACTER(len=4) :: prb
LOGICAL :: nldebug
!
INTEGER :: nx, ny, levels
TYPE(grid2_type), ALLOCATABLE :: grids(:), new_grids(:)
INTEGER, DIMENSION(ndims) :: e, s, npt_glob, npt_loc, npt_loc_min
!
CHARACTER(len=64) :: str
REAL(rkind) :: err
INTEGER :: i, k, l
!--------------------------------------------------------------------------------
! 1.0 Prologue
!
! Init MPI and setup 2D grid topology
!
CALL mpi_init(ierr)
CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
CALL mpi_dims_create(npes, ndims, dims, ierr)
CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,&
& ierr)
CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr)
CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr)
CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr)
!
IF( me .EQ. 0 ) WRITE(*,'(a,i3,i3)') '2d processor grid', dims
!
! Get nx, ny, levels from h5 file created by test_intergrid0
!
IF( command_argument_count() > 0 ) THEN
CALL get_command_argument(1, filein)
END IF
IF(me.EQ.0) WRITE(*,'(a,a)') 'filein = ', TRIM(filein)
!
CALL mpi_comm_dup(comm_cart, comm_futils, ierr)
CALL openf(filein, fin, mpicomm=comm_futils)
CALL getatt(fin, '/', 'NX', nx, ierr)
CALL getatt(fin, '/', 'NY', ny, ierr)
CALL getatt(fin, '/', 'LEVELS', levels, ierr)
CALL getatt(fin, '/', 'PRB', prb, ierr)
CALL getatt(fin, '/', 'NLDEBUG', nldebug, ierr)
IF(me.EQ.0) WRITE(*,'(a,a,3i5,l3)') 'prb, nx, ny, levels: ', prb, nx, ny, &
& levels, nldebug
!--------------------------------------------------------------------------------
! 2.0 Read (f,v) from h5 file
!
ALLOCATE(grids(levels))
!
! Partition on finest grid
!
npt_glob(1) = nx+1
npt_glob(2) = ny+1
DO i=1,ndims
CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i))
e(i) = s(i) + npt_loc(i) - 1
lmax(i) = get_lmax(s(i), npt_loc(i), 1, comm1d(i))
END DO
npt_loc = e-s+1
IF(me.EQ.0) WRITE(*,'(a,2i4)') 'lmax', lmax
!
! Partition on coaser grids
!
DO l=1,levels
IF(l.GT.1) THEN
CALL coarse(s,e)
npt_loc = e-s+1
CALL mpi_allreduce(npt_loc, npt_loc_min, 2, MPI_INTEGER, &
& MPI_MIN, comm_cart, ierr)
CALL mpi_allreduce(e, npt_glob, 2, MPI_INTEGER, MPI_MAX, &
& comm_cart, ierr)
npt_glob = npt_glob+1
END IF
WRITE(str,'(a,i3,a)') 'Partition at level', l, ': start. index ='
CALL disp(TRIM(str), s, comm_cart)
IF(me.EQ.0) THEN
WRITE(*,'(a,2i6)') 'npt_glob ', npt_glob
WRITE(*,'(a,2i6)') 'npt_loc_min', npt_loc_min
END IF
grids(l)%s = s
grids(l)%e = e
grids(l)%npt = npt_glob
grids(l)%f = gvector_2d(s, e, [1,1])
grids(l)%v = gvector_2d(s, e, [1,1])
ALLOCATE(grids(l)%x(0:npt_glob(1)-1)) ! Global coords (x,y)
ALLOCATE(grids(l)%y(0:npt_glob(2)-1))
WRITE(dsname,'("/mglevels/level.",i2.2)') l
CALL getarr(fin, TRIM(dsname)//"/x", grids(l)%x)
CALL getarr(fin, TRIM(dsname)//"/y", grids(l)%y)
CALL getarrnd(fin, TRIM(dsname)//"/v", grids(l)%v%val, [1,2], garea=[1,1])
CALL getarrnd(fin, TRIM(dsname)//"/f", grids(l)%f%val, [1,2], garea=[1,1])
END DO
!--------------------------------------------------------------------------------
! 3.0 Parallel intergrid transfer
!
ALLOCATE(new_grids(levels))
CALL copy_grids(grids, new_grids)
!
! Set up restriction stencil
!
DO l=2,levels
CALL init_restrict(new_grids(l), prb, comm_cart)
END DO
!
! Prolongation of v
!
DO l=levels-1,1,-1
CALL exchange(comm_cart, grids(l+1)%v)
CALL prolong(grids(l+1)%v, new_grids(l)%v)
IF(nldebug) THEN
IF(me.EQ.0) WRITE(*,'(a)') '====='
DO k=0,npes
IF(me.EQ.k) THEN
s = grids(l+1)%f%s
e = grids(l+1)%f%e
WRITE(*,'(a,i2)') 'reference vbar on proc.', me
DO i=s(1),e(1)
WRITE(*,'(10f8.3)') grids(l+1)%v%val(i,s(2):e(2))
END DO
s = grids(l)%f%s
e = grids(l)%f%e
WRITE(*,'(a,i2)') 'reference v on proc.', me
DO i=s(1),e(1)
WRITE(*,'(10f8.3)') grids(l)%v%val(i,s(2):e(2))
END DO
WRITE(*,'(a,i2)') 'compute v on proc.', me
DO i=s(1),e(1)
WRITE(*,'(10f8.3)') new_grids(l)%v%val(i,s(2):e(2))
END DO
END IF
CALL mpi_barrier(comm_cart, ierr)
END DO
END IF
err = norm_vec(new_grids(l)%v-grids(l)%v, comm_cart, 0)
IF(me.EQ.0) WRITE(*,'(a,i3,1pe12.3)') 'Error of prolongation: ', l, err
END DO
!
! Restriction of f
!
DO l=2,levels
CALL exchange(comm_cart, grids(l-1)%f)
CALL restrict(new_grids(l)%restrict_mat, grids(l-1)%f, new_grids(l)%f)
IF(nldebug) THEN
IF(me.EQ.0) WRITE(*,'(a)') '====='
DO k=0,npes
IF(me.EQ.k) THEN
s = grids(l-1)%f%s
e = grids(l-1)%f%e
WRITE(*,'(a,i2)') 'reference f on proc.', me
DO i=s(1),e(1)
WRITE(*,'(10f8.3)') grids(l-1)%f%val(i,s(2):e(2))
END DO
s = grids(l)%f%s
e = grids(l)%f%e
WRITE(*,'(a,i2)') 'reference fbar on proc.', me
DO i=s(1),e(1)
WRITE(*,'(10f8.3)') grids(l)%f%val(i,s(2):e(2))
END DO
WRITE(*,'(a,i2)') 'compute fbar on proc.', me
DO i=s(1),e(1)
WRITE(*,'(10f8.3)') new_grids(l)%f%val(i,s(2):e(2))
END DO
END IF
CALL mpi_barrier(comm_cart, ierr)
END DO
END IF
err = norm_vec(new_grids(l)%f-grids(l)%f, comm_cart, 0)
IF(me.EQ.0) WRITE(*,'(a,i3,1pe12.3)') 'Error of restriction: ', l, err
END DO
!--------------------------------------------------------------------------------
! 9.0 Epilogue
!
CALL closef(fin)
CALL mpi_finalize(ierr)
!
CONTAINS
SUBROUTINE copy_grids(g1, g2)
TYPE(grid2_type), INTENT(in) :: g1(:)
TYPE(grid2_type), INTENT(out) :: g2(:)
INTEGER :: l
DO l=1,SIZE(g1)
g2(l)%s = g1(l)%s
g2(l)%e = g1(l)%e
g2(l)%npt_loc = g1(l)%npt_loc
g2(l)%npt = g1(l)%npt
ALLOCATE(g2(l)%x(0:g2(l)%npt(1)-1)); g2(l)%x = g1(l)%x
ALLOCATE(g2(l)%y(0:g2(l)%npt(2)-1)); g2(l)%y = g1(l)%y
g2(l)%v = gvector_2d(g1(l)%v%s, g1(l)%v%e, g1(l)%v%g); g2(l)%v%val = g1(l)%f%val
g2(l)%f = gvector_2d(g1(l)%f%s, g1(l)%f%e, g1(l)%f%g); g2(l)%f%val = g1(l)%f%val
END DO
END SUBROUTINE copy_grids
END PROGRAM main
diff --git a/multigrid/src/test_jacobi.f90 b/multigrid/src/test_jacobi.f90
index f914535..da791e5 100644
--- a/multigrid/src/test_jacobi.f90
+++ b/multigrid/src/test_jacobi.f90
@@ -1,254 +1,254 @@
!>
!> @file test_jacobi.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
!
! Test 2D parallel Jacobi using STENCIL_2D matrix-free structure.
!
MODULE mod
USE iso_fortran_env, ONLY : rkind => real64
IMPLICIT NONE
!
LOGICAL, PARAMETER :: nldebug=.FALSE.
REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0)
CONTAINS
END MODULE mod
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
PROGRAM main
USE mpi
USE pputils2, ONLY : dist1d, exchange, norm2_vec=>ppnorm2, timera, hostlist
USE parmg, ONLY : jacobi, get_resids
USE stencil, ONLY : stencil_2d, init, laplacian, putmat
USE mod
IMPLICIT NONE
!
INTEGER, PARAMETER :: ndims=2
!
INTEGER :: me, neighs(4), npes, ierr
INTEGER, DIMENSION(ndims) :: dims=[0,0]
INTEGER, DIMENSION(ndims) :: coords, comm1d
LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.]
LOGICAL :: reorder =.FALSE.
INTEGER :: comm_cart
!
INTEGER :: nx=4, ny=4 ! Number of intervals
INTEGER, DIMENSION(ndims) :: e, s, lb, ub, npt_glob, npt_loc
!
REAL(rkind), ALLOCATABLE :: xgrid(:), ygrid(:)
REAL(rkind) :: dx, dy
INTEGER, DIMENSION(5,2) :: id ! 5-point stencil
INTEGER :: npoints
TYPE(stencil_2d) :: mat
INTEGER :: i
!
REAL(rkind), ALLOCATABLE :: f(:,:), v(:,:), u(:,:)
REAL(rkind), ALLOCATABLE :: resids(:,:), errs(:,:)
REAL(rkind), ALLOCATABLE :: resid_it(:), err_it(:)
REAL(rkind) :: omega=1.0d0, resid
INTEGER :: it, it_skip, nits=100
!
NAMELIST /in/ nx, ny, omega, nits
!================================================================================
! 1.0 Prologue
!
! 2D process grid
CALL mpi_init(ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
CALL mpi_dims_create(npes, ndims, dims, ierr)
CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,&
& ierr)
!
CALL mpi_comm_rank(comm_cart, me, ierr)
CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr)
CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr)
CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr)
!
CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr)
CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr)
!
CALL hostlist(comm_cart)
IF(me.EQ.0) THEN
WRITE(*,'(a,i0,a,i0/)') "Process grid: ", dims(1), " X ", dims(2)
END IF
!
! Read problem inputs
IF(me.EQ.0) THEN
READ(*,in)
WRITE(*,in)
END IF
!
CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr)
CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr)
CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, comm_cart, ierr)
CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr)
!================================================================================
! 2.0 2d Grid construction
!
! Partition 2D grid
CALL timera(0, 'Grid_construction')
npt_glob(1) = nx+1
npt_glob(2) = ny+1
DO i=1,ndims
CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i))
e(i) = s(i) + npt_loc(i) - 1
END DO
WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', &
& (coords(i),s(i),e(i),i=1,ndims)
!
! Global mesh
dx = 1.0d0/REAL(nx)
dy = 1.0d0/REAL(ny)
ALLOCATE(xgrid(0:nx))
ALLOCATE(ygrid(0:ny))
xgrid = [ (i*dx, i=0,nx) ]
ygrid = [ (i*dy, i=0,ny) ]
CALL timera(1, 'Grid_construction')
!================================================================================
! 3.0 FD Laplacian
!
CALL timera(0, 'Laplacian')
id=RESHAPE([ 0, -1, 0, 1, 0, &
0, 0,-1, 0, 1], &
[5,2])
npoints = 5
CALL init(s, e, id, .FALSE., mat, comm_cart)
!
CALL laplacian(dx, dy, mat)
CALL timera(1, 'Laplacian')
!================================================================================
! 4.0 Test parallel Jacobi with \nabla u(x,y) = f(x,y)
!
! Problem definition
!
s = mat%s
e = mat%e
lb = s-1
ub = e+1
ALLOCATE(f(lb(1):ub(1),lb(2):ub(2))) ! RHS
ALLOCATE(v(lb(1):ub(1),lb(2):ub(2))) ! Exact solutions
ALLOCATE(u(lb(1):ub(1),lb(2):ub(2))) ! Computed solutions
ALLOCATE(resids(lb(1):ub(1),lb(2):ub(2))) ! Residuals
ALLOCATE(errs(lb(1):ub(1),lb(2):ub(2))) ! Errors
ALLOCATE(resid_it(0:nits))
ALLOCATE(err_it(0:nits))
!
f(s(1):e(1),s(2):e(2)) = rhs(xgrid(s(1):e(1)),ygrid(s(2):e(2)))
v(s(1):e(1),s(2):e(2)) = exact(xgrid(s(1):e(1)),ygrid(s(2):e(2)))
CALL exchange(comm_cart, f)
CALL exchange(comm_cart, v)
!
! Residuals of exact solutions
resids = get_resids(mat,v,f)
resid = norm2_vec(resids, comm_cart)
!
! Jacobi iteration loop
!
IF(me.EQ.0) WRITE(*,'(/a6,t14,a,t34,a)') 'it', 'residual norm', 'discretization error'
u = 0.0d0
CALL exchange(comm_cart, u)
resids = get_resids(mat,u,f)
errs = u-v
resid_it(0) = norm2_vec(resids, comm_cart)
err_it(0) = norm2_vec(errs, comm_cart)
it_skip = MAX(1,nits/10)
!
CALL timera(0, 'Jacobi')
DO it=1,nits
CALL jacobi(mat, omega, 1, u, f)
CALL exchange(comm_cart, u)
resids = get_resids(mat,u,f)
errs = u-v
resid_it(it) = norm2_vec(resids, comm_cart)
err_it(it) = norm2_vec(errs, comm_cart)
IF(me.EQ.0 .AND. MODULO(it,it_skip).EQ.0 ) THEN
WRITE(*,'(i6,4(1pe12.3))') it, resid_it(it), resid_it(it)/resid_it(it-1),&
& err_it(it), err_it(it)/err_it(it-1)
END IF
END DO
CALL timera(1, 'Jacobi')
!================================================================================
! 9.0 Epilogue
CALL h5file
!
CALL timera(9, '')
CALL MPI_FINALIZE(ierr)
CONTAINS
SUBROUTINE disp(str, arr)
CHARACTER(len=*), INTENT(in) :: str
REAL(rkind), INTENT(in) :: arr(:,:)
INTEGER :: j
WRITE(*,'(/a)') str
DO j=1,SIZE(arr,2)
WRITE(*,'(10f8.3)') arr(:,j)
END DO
END SUBROUTINE disp
!
SUBROUTINE h5file
USE futils
CHARACTER(len=128) :: file='test_jacobi.h5'
INTEGER :: fid
CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart)
CALL putarr(fid, '/xgrid', xgrid, ionode=0) ! only rank 0 does IO
CALL putarr(fid, '/ygrid', ygrid, ionode=0) ! only rank 0 does IO
!
CALL putarrnd(fid, '/f', f, (/1,2/), garea=(/1,1/))
CALL putarrnd(fid, '/v', v, (/1,2/), garea=(/1,1/))
CALL putarrnd(fid, '/u', u, (/1,2/), garea=(/1,1/))
CALL putarrnd(fid, '/errs', errs, (/1,2/), garea=(/1,1/))
CALL putarrnd(fid, '/resids', resids,(/1,2/), garea=(/1,1/))
!
CALL putarr(fid, '/resid', resid_it, ionode=0)
CALL putarr(fid, '/error', err_it, ionode=0)
!
CALL putmat(fid, '/MAT', mat)
CALL closef(fid)
END SUBROUTINE h5file
!
FUNCTION outerprod(x, y) RESULT(r)
!
! outer product
!
REAL(rkind), INTENT(in) :: x(:), y(:)
REAL(rkind) :: r(SIZE(x),SIZE(y))
INTEGER :: i, j
DO j=1,SIZE(y)
DO i=1,SIZE(x)
r(i,j) = x(i)*y(j)
END DO
END DO
END FUNCTION outerprod
!
FUNCTION rhs(x,y)
REAL(rkind), INTENT(in) :: x(:), y(:)
REAL(rkind) :: rhs(SIZE(x),SIZE(y))
rhs = -10.d0*pi**2 * outerprod(SIN(pi*x), SIN(3.d0*pi*y))
END FUNCTION rhs
!
FUNCTION exact(x,y)
REAL(rkind), INTENT(in) :: x(:), y(:)
REAL(rkind) :: exact(SIZE(x),SIZE(y))
exact = outerprod(SIN(pi*x), SIN(3.d0*pi*y))
END FUNCTION exact
END PROGRAM main
diff --git a/multigrid/src/test_jacobig.f90 b/multigrid/src/test_jacobig.f90
index 1fdca83..ff71a3e 100644
--- a/multigrid/src/test_jacobig.f90
+++ b/multigrid/src/test_jacobig.f90
@@ -1,331 +1,331 @@
!>
!> @file test_jacobig.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
!
! Test 2D parallel Jacobi using STENCIL_2D matrix-free structure.
!
MODULE mod
USE iso_fortran_env, ONLY : rkind => real64
IMPLICIT NONE
!
LOGICAL, PARAMETER :: nldebug=.FALSE.
REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0)
CONTAINS
END MODULE mod
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
PROGRAM main
USE mpi
USE fdmat_mod, ONLY : fdmat, ibc_fdmat, ibc_rhs
USE pputils2, ONLY : dist1d, timera, hostlist
USE gvector, ONLY : gvector_2d, ASSIGNMENT(=), OPERATOR(-)
USE parmg, ONLY : grid2_type, create_grid, jacobi, exchange, get_resids, norm_vec
USE stencil, ONLY : stencil_2d, putmat
USE mod
IMPLICIT NONE
!
INTEGER, PARAMETER :: ndims=2
!
INTEGER :: me, neighs(4), npes, ierr
INTEGER, DIMENSION(ndims) :: dims=[0,0]
INTEGER, DIMENSION(ndims) :: coords, comm1d
LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.]
LOGICAL :: reorder =.FALSE.
INTEGER :: comm_cart
INTEGER, DIMENSION(ndims) :: e0, s0, e, s, npt_glob, npt_loc
!
REAL(rkind), ALLOCATABLE :: xgrid(:), ygrid(:)
INTEGER, ALLOCATABLE :: id(:,:)
REAL(rkind) :: dx, dy
INTEGER :: npoints ! Number of points in FD stencil
!
TYPE(gvector_2d) :: v_exact, resids, errs
REAL(rkind), ALLOCATABLE :: resid_it(:), err_it(:)
INTEGER, DIMENSION(ndims) :: g
INTEGER :: i, it, it_skip
!
INTEGER :: levels=1
TYPE(grid2_type), ALLOCATABLE :: grids(:)
!
! Input quantities
!
CHARACTER(len=4) :: prb='dddd'
INTEGER :: nx=4, ny=4 ! Number of intervals
INTEGER :: kx=1, ky=1
REAL(rkind) :: Lx=1.0, Ly=1.0
REAL(rkind) :: icrosst=1.0, beta=0.0, miome=200.0
REAL(rkind) :: omega=1.0d0
INTEGER :: nits=100, nu=1
!
NAMELIST /in/ prb, nx, ny, kx, ky, Lx, Ly, icrosst, beta, &
& miome, omega, nits, nu
!================================================================================
! 1.0 Prologue
!
! 2D process grid
!
CALL mpi_init(ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
CALL mpi_dims_create(npes, ndims, dims, ierr)
CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,&
& ierr)
!
CALL mpi_comm_rank(comm_cart, me, ierr)
CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr)
CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr)
CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr)
!
CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr)
CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr)
!
CALL hostlist(comm_cart)
IF(me.EQ.0) THEN
WRITE(*,'(a,i0,a,i0/)') "Process grid: ", dims(1), " X ", dims(2)
END IF
!
! Read problem inputs
!
IF(me.EQ.0) THEN
READ(*,in)
WRITE(*,in)
END IF
CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, comm_cart, ierr)
CALL mpi_bcast(kx, 1, MPI_INTEGER, 0, comm_cart, ierr)
call mpi_bcast(ky, 1, MPI_INTEGER, 0, comm_cart, ierr)
CALL mpi_bcast(icrosst, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr)
CALL mpi_bcast(Lx,1,MPI_DOUBLE_PRECISION,0,comm_cart, ierr)
CALL mpi_bcast(Ly,1,MPI_DOUBLE_PRECISION,0,comm_cart, ierr)
CALL mpi_bcast(beta,1,MPI_DOUBLE_PRECISION,0,comm_cart, ierr)
CALL mpi_bcast(miome, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr)
CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr)
CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr)
CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, comm_cart, ierr)
CALL mpi_bcast(nu, 1, MPI_INTEGER, 0, comm_cart, ierr)
CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr)
!================================================================================
! 2.0 2d Grid construction
!
! Partition 2D grid
!
CALL timera(0, 'Grid_construction')
npt_glob(1) = nx+1
npt_glob(2) = ny+1
DO i=1,ndims
CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i))
e(i) = s(i) + npt_loc(i) - 1
END DO
WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', &
& (coords(i),s(i),e(i),i=1,ndims)
!
! Global mesh
!
dx = Lx/REAL(nx)
dy = Ly/REAL(ny)
ALLOCATE(xgrid(0:nx))
ALLOCATE(ygrid(0:ny))
xgrid = [ (i*dx, i=0,nx) ]
ygrid = [ (i*dy, i=0,ny) ]
CALL timera(1, 'Grid_construction')
!
! Create grid structure
!
ALLOCATE(grids(levels))
npoints = 9 ! Size of FD stencil
ALLOCATE(id(npoints,2))
id=RESHAPE([ 0, -1, 0, 1, -1, 1, -1, 0, 1, &
0, -1, -1, -1, 0, 0, 1, 1, 1], &
[npoints,2])
CALL create_grid(xgrid, ygrid, s, e, id, prb, grids, comm_cart)
!================================================================================
! 3.0 FD Operator
!
CALL timera(0, 'Laplacian')
!
CALL fdmat(grids(1), fdense, icrosst, grids(1)%fdmat)
CALL ibc_fdmat(grids(1)%fdmat, prb)
!
CALL timera(1, 'Laplacian')
!================================================================================
! 4.0 RHS and exact solution
!
! Allocate memory
!
s0 = grids(1)%s0; e0 = grids(1)%e0
s = grids(1)%s; e = grids(1)%e
g = [1,1]
v_exact = gvector_2d(s, e, g) ! Exact solutions
errs = gvector_2d(s, e, g) ! Disc. errors
resids = gvector_2d(s, e, g) ! Residuals
ALLOCATE(resid_it(0:nits))
ALLOCATE(err_it(0:nits))
!
! Set RHS at the finest grid. Impose Dirichlet BC.
!
grids(1)%f = frhs(xgrid(s(1):e(1)),ygrid(s(2):e(2)))
CALL ibc_rhs(grids(1)%f, s0, e0, prb)
!
! Exact solutions
!
v_exact = fexact(xgrid(s(1):e(1)),ygrid(s(2):e(2)))
!================================================================================
! 5.0 Jacobi iteration loop
!
IF(me.EQ.0) WRITE(*,'(/a6,t14,a,t34,a)') 'it', 'residual norm', 'discretization error'
grids(1)%v = 0.0d0
resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f)
errs = grids(1)%v - v_exact
resid_it(0) = norm_vec(resids, comm_cart, root=0)
err_it(0) = norm_vec(errs, comm_cart, root=0)
it_skip = MAX(1,nits/10)
!
CALL timera(0, 'Jacobi')
DO it=1,nits
CALL jacobi(comm_cart, grids(1)%fdmat, omega, nu, grids(1)%v, grids(1)%f)
resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f)
errs = grids(1)%v - v_exact
resid_it(it) = norm_vec(resids, comm_cart, root=0)
err_it(it) = norm_vec(errs, comm_cart, root=0)
IF(me.EQ.0 .AND. MODULO(it,it_skip).EQ.0 ) THEN
WRITE(*,'(i6,4(1pe12.3))') it, resid_it(it), resid_it(it)/resid_it(it-1),&
& err_it(it), err_it(it)/err_it(it-1)
END IF
END DO
CALL timera(1, 'Jacobi')
!================================================================================
! 9.0 Epilogue
CALL h5file
!
CALL timera(9, '')
CALL MPI_FINALIZE(ierr)
CONTAINS
!
!+++
FUNCTION fdense(x)
!
! Return density
!
REAL(rkind), INTENT(in) :: x(:)
REAL(rkind) :: fdense(SIZE(x))
fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 );
END FUNCTION fdense
!+++
FUNCTION fexact(x,y)
!
! Return analytical solution
!
REAL(rkind), INTENT(in) :: x(:), y(:)
REAL(rkind) :: fexact(SIZE(x),SIZE(y))
REAL(rkind) :: c
INTEGER :: j
IF(prb.EQ.'dddd') THEN
DO j=1,SIZE(y)
c = SIN(2.0d0*pi*ky*y(j)/Ly)
fexact(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c
END DO
ELSE IF (prb.EQ.'nndd') THEN
DO j=1,SIZE(y)
c = SIN(2.0d0*pi*ky*y(j)/Ly)
fexact(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c
END DO
END IF
END FUNCTION fexact
!+++
SUBROUTINE h5file
USE futils
CHARACTER(len=128) :: file='test_jacobig.h5'
INTEGER :: fid
CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart)
CALL putarr(fid, '/xgrid', xgrid, ionode=0) ! only rank 0 does IO
CALL putarr(fid, '/ygrid', ygrid, ionode=0) ! only rank 0 does IO
!
CALL putarrnd(fid, '/f', grids(1)%f%val, (/1,2/), garea=g)
CALL putarrnd(fid, '/v', v_exact%val, (/1,2/), garea=g)
CALL putarrnd(fid, '/u', grids(1)%v%val, (/1,2/), garea=g)
CALL putarrnd(fid, '/errs', errs%val, (/1,2/), garea=(/1,1/))
CALL putarrnd(fid, '/resids', resids%val,(/1,2/), garea=(/1,1/))
!
CALL putarr(fid, '/resid', resid_it, ionode=0)
CALL putarr(fid, '/error', err_it, ionode=0)
!
CALL putmat(fid, '/MAT', grids(1)%fdmat)
CALL closef(fid)
END SUBROUTINE h5file
!+++
FUNCTION frhs(x,y)
!
! Return RHS
!
REAL(rkind), INTENT(in) :: x(:), y(:)
REAL(rkind) :: frhs(SIZE(x),SIZE(y))
REAL(rkind) :: c, s, d(SIZE(x))
REAL(rkind) :: corr
INTEGER :: j
corr = 1.d0+icrosst**2/4.0d0
d(:) = fdense(x(:))
IF(prb.EQ.'dddd') THEN
DO j=1,SIZE(y)
c = COS(2.0d0*pi*ky*y(j)/Ly)
s = SIN(2.0d0*pi*ky*y(j)/Ly)
frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s &
& -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) &
& + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s
END DO
ELSE IF (prb.EQ.'nndd') THEN
DO j=1,SIZE(y)
c = COS(2.0d0*pi*ky*y(j)/Ly)
s = SIN(2.0d0*pi*ky*y(j)/Ly)
frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s &
& +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) &
& + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s
END DO
END IF
END FUNCTION frhs
!+++!
FUNCTION outerprod(x, y) RESULT(r)
!
! outer product
!
REAL(rkind), INTENT(in) :: x(:), y(:)
REAL(rkind) :: r(SIZE(x),SIZE(y))
INTEGER :: i, j
DO j=1,SIZE(y)
DO i=1,SIZE(x)
r(i,j) = x(i)*y(j)
END DO
END DO
END FUNCTION outerprod
!
FUNCTION rhs(x,y)
REAL(rkind), INTENT(in) :: x(:), y(:)
REAL(rkind) :: rhs(SIZE(x),SIZE(y))
rhs = -10.d0*pi**2 * outerprod(SIN(pi*x), SIN(3.d0*pi*y))
END FUNCTION rhs
!
FUNCTION exact(x,y)
REAL(rkind), INTENT(in) :: x(:), y(:)
REAL(rkind) :: exact(SIZE(x),SIZE(y))
exact = outerprod(SIN(pi*x), SIN(3.d0*pi*y))
END FUNCTION exact
END PROGRAM main
diff --git a/multigrid/src/test_mg.f90 b/multigrid/src/test_mg.f90
index 8275f7a..7a90f57 100644
--- a/multigrid/src/test_mg.f90
+++ b/multigrid/src/test_mg.f90
@@ -1,279 +1,279 @@
!>
!> @file test_mg.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test multigrid V-cycle
!
USE multigrid
USE math_util, ONLY : root_bessj
IMPLICIT NONE
!
INTEGER :: nx=8, nidbas=1, ngauss=2, alpha=0, nits=40
INTEGER :: modem=22, modep=10
INTEGER :: levels=2, nu1=1, nu2=1, mu=1, nu0=1
CHARACTER(len=4) :: relax='jac '
LOGICAL :: nlfixed = .FALSE.
DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0)
DOUBLE PRECISION :: omega=2.0d0/3.0d0
INTEGER :: l, nrank, its
DOUBLE PRECISION :: errdisc_dir
DOUBLE PRECISION, ALLOCATABLE :: u_direct(:), u_exact(:), u_calc(:)
DOUBLE PRECISION, ALLOCATABLE :: sol_direct(:), sol_calc(:)
DOUBLE PRECISION, ALLOCATABLE :: err(:), resid(:), errdisc(:)
DOUBLE PRECISION, ALLOCATABLE :: errdisc_fmg(:)
!
TYPE(grid1d), ALLOCATABLE :: gridx(:)
TYPE(mg_info) :: info
!
NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, modem, modep, alpha, &
& relax, omega, nits, nlfixed, levels, nu1, nu2, mu, nu0
!--------------------------------------------------------------------------------
! 1. Prologue
! Inputs
!
READ(*,newrun)
WRITE(*,newrun)
!
levels = MIN(levels, get_lmax(nx))
!
info%nu1 = nu1
info%nu2 = nu2
info%mu = mu
info%nu0 = nu0
info%levels = levels
info%relax = relax
info%omega = omega
!
! Create grids
!
ALLOCATE(gridx(levels))
CALL create_grid(nx, nidbas, ngauss, alpha, gridx)
WRITE(*,'(a/(20i6))') 'Number of intervals in grids', (gridx(l)%n, l=1,levels)
!
! Create FE matrice and set BC u(0)=u(1)=0
!
DO l=1,levels
CALL femat(gridx(l)%spl, gridx(l)%mata, coefeq)
!
! Left Dirichlet BC (only for Cartesian geometry)
IF(alpha .EQ. 0) THEN
CALL ibcmat(1, gridx(l)%mata)
END IF
!
! Right Dirichlet BC
CALL ibcmat(gridx(l)%mata%rank, gridx(l)%mata)
!
! BC on grid transfer operator
IF(l.GT.1) THEN
WHERE( ABS(gridx(l)%transf%val) < 1.d-8) gridx(l)%transf%val=0.0d0
IF(alpha .EQ. 0) gridx(l)%transf%val(2:,1)=0.0d0
gridx(l)%transf%val(1:gridx(l-1)%rank-1,gridx(l)%rank)=0.0d0
END IF
END DO
CALL printdiag_gb('Diagonal of coarsest A', gridx(levels)%mata)
!
! Construct RHS and set BC only on the finest grid
!
nrank = gridx(1)%rank
CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs)
!
! Left Dirichlet BC (only for Cartesian geometry)
IF(alpha .EQ. 0) THEN
gridx(1)%f(1) = 0.0d0
END IF
!
! Right Dirichlet BC
gridx(1)%f(nrank) = 0.0d0
!--------------------------------------------------------------------------------
! 2. Direct solution
!
WRITE(*,'(//a)') 'Direct solution for the finest grid problem'
ALLOCATE(u_direct(0:nx))
ALLOCATE(sol_direct(nrank))
CALL direct_solve(gridx(1), sol_direct)
CALL gridval(gridx(1)%spl, gridx(1)%x, u_direct, 0, sol_direct)
errdisc_dir = disc_err(gridx(1)%spl, sol_direct, sol)
WRITE(*,'(a,1pe12.3)') 'Discretization error', errdisc_dir
!--------------------------------------------------------------------------------
! 3. Solution from MG V-cycles
!
WRITE(*,'(//a)') 'Multigrid MG V-cycles'
ALLOCATE(sol_calc(nrank))
ALLOCATE(err(0:nits))
ALLOCATE(errdisc(0:nits))
ALLOCATE(resid(0:nits))
!
! Initial guess
!
sol_calc(:) = 0.0d0
IF(nlfixed) THEN
sol_calc(:) = sol_direct(:)
END IF
gridx(1)%v(:) = sol_calc(:)
err(0) = normf(gridx(1)%matm, sol_calc-sol_direct)
errdisc(0) = disc_err(gridx(1)%spl, sol_calc, sol)
resid(0) = residue(gridx(1)%mata, gridx(1)%f, sol_calc)
!
! Iterations
!
DO its=1,nits
CALL mg(gridx, info, 1)
sol_calc(:) = gridx(1)%v(:)
err(its) = normf(gridx(1)%matm, sol_calc-sol_direct)
errdisc(its) = disc_err(gridx(1)%spl, sol_calc, sol)
resid(its) = residue(gridx(1)%mata, gridx(1)%f, sol_calc)
END DO
!
WRITE(*,'(a4,3(a12,a8))') 'its', 'error', 'ratio', 'residue', 'ratio', &
& 'disc. err', 'ratio'
WRITE(*,'(i4,3(1pe12.3,8x))') 0, err(0), resid(0), errdisc(0)
WRITE(*,'((i4,3(1pe12.3,0pf8.2)))') (its, err(its), err(its)/err(its-1), &
& resid(its), resid(its)/resid(its-1), &
& errdisc(its), errdisc(its)/errdisc(its-1), its=1,nits)
!--------------------------------------------------------------------------------
! 4. Solution from FMG
!
WRITE(*,'(//a)') 'Full Multigrid'
ALLOCATE(errdisc_fmg(nits))
DO its=1,nits
info%nu0 = its
CALL fmg(gridx, info, 1)
sol_calc(:) = gridx(1)%v(:)
errdisc_fmg(its) = disc_err(gridx(1)%spl, sol_calc, sol)
resid(its) = residue(gridx(1)%mata, gridx(1)%f, sol_calc)
END DO
WRITE(*,'(a4,2(a12,a8))') 'nu0', 'residue', 'ratio','disc. err', 'ratio'
WRITE(*,'((i4,2(1pe12.3,0pf8.3)))') (its, resid(its), resid(its)/resid(its-1), &
& errdisc_fmg(its),errdisc_fmg(its)/errdisc_dir, its=1,nits)
!
! Grid values at final iteration
!
ALLOCATE(u_exact(0:nx))
ALLOCATE(u_calc(0:nx))
u_exact = sol(gridx(1)%x)
CALL gridval(gridx(1)%spl, gridx(1)%x, u_calc, 0, sol_calc)
!--------------------------------------------------------------------------------
! 9. Epilogue
!
! Creata HDF5 file
!
CALL h5file
!--------------------------------------------------------------------------------
CONTAINS
FUNCTION rhs(x)
DOUBLE PRECISION, INTENT(in) :: x
DOUBLE PRECISION :: rhs
DOUBLE PRECISION :: nump
SELECT CASE (alpha)
CASE(0) ! Cartesian geometry
rhs = SIN(pi*kmode*x)
CASE(1) ! Cylindrical
nump = root_bessj(modem, modep)
rhs = x * nump**2 * bessel_jn(modem, nump*x)
END SELECT
END FUNCTION rhs
FUNCTION sol(x)
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: sol(SIZE(x))
DOUBLE PRECISION :: nump
SELECT CASE (alpha)
CASE(0) ! Cartesian geometry
sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:))
CASE(1) ! Cylindrical
nump = root_bessj(modem, modep)
sol(:) = bessel_jn(modem, nump*x(:))
END SELECT
END FUNCTION sol
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(:)
SELECT CASE (alpha)
CASE(0) ! Cartesian geometry
c(1) = 1.0d0
idt(1) = 1
idw(1) = 1
c(2) = sigma
idt(2) = 0
idw(2) = 0
CASE(1) ! Cylindrical
c(1) = x
idt(1) = 1
idw(1) = 1
c(2) = REAL(modem,8)**2/x
idt(2) = 0
idw(2) = 0
END SELECT
END SUBROUTINE coefeq
SUBROUTINE h5file
USE futils
CHARACTER(len=128) :: file='test_mg.h5'
INTEGER :: fid
INTEGER :: l
CHARACTER(len=64) :: dsname
CALL creatf(file, fid, real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
CALL attach(fid, '/', 'SIGMA', sigma)
CALL attach(fid, '/', 'KMODE', kmode)
CALL attach(fid, '/', 'MODEM', modem)
CALL attach(fid, '/', 'MODEP', modep)
CALL attach(fid, '/', 'ALPHA', alpha)
CALL attach(fid, '/', 'RELAX', relax)
CALL attach(fid, '/', 'OMEGA', omega)
CALL attach(fid, '/', 'NITS', nits)
CALL attach(fid, '/', 'NLFIXED', nlfixed)
CALL attach(fid, '/', 'LEVELS', levels)
CALL attach(fid, '/', 'NU1', nu1)
CALL attach(fid, '/', 'NU2', nu2)
CALL attach(fid, '/', 'NU0', nu0)
CALL attach(fid, '/', 'MU', mu)
CALL creatg(fid, '/mglevels')
DO l=1,levels
WRITE(dsname,'("/mglevels/level.",i2.2)') l
CALL creatg(fid, TRIM(dsname))
CALL putmat(fid, TRIM(dsname)//'/mata', gridx(l)%mata)
IF(l.GT.1) THEN
CALL putarr(fid, TRIM(dsname)//'/matp', gridx(l)%transf%val)
CALL attach(fid, TRIM(dsname)//'/matp', 'M', gridx(l)%transf%mrows)
CALL attach(fid, TRIM(dsname)//'/matp', 'N', gridx(l)%transf%ncols)
END IF
CALL putarr(fid, TRIM(dsname)//'/f', gridx(l)%f)
CALL putarr(fid, TRIM(dsname)//'/v', gridx(l)%v)
END DO
CALL creatg(fid, '/Iterations')
CALL putarr(fid, '/Iterations/errors', err)
CALL putarr(fid, '/Iterations/residues', resid)
CALL putarr(fid, '/Iterations/disc_errors', errdisc)
CALL putarr(fid, '/Iterations/disc_errors_fmg', errdisc_fmg)
CALL putarr(fid, '/Iterations/xgrid', gridx(1)%x)
CALL putarr(fid, '/Iterations/u_direct', u_direct)
CALL putarr(fid, '/Iterations/u_exact', u_exact)
CALL putarr(fid, '/Iterations/u_calc', u_calc)
CALL closef(fid)
END SUBROUTINE h5file
END PROGRAM main
diff --git a/multigrid/src/test_mg2d.f90 b/multigrid/src/test_mg2d.f90
index 4730a65..ffa9e6d 100644
--- a/multigrid/src/test_mg2d.f90
+++ b/multigrid/src/test_mg2d.f90
@@ -1,413 +1,413 @@
!>
!> @file test_mg2d.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test 2d multigrid: Cartesian case
!
USE multigrid
USE csr
IMPLICIT NONE
INCLUDE 'mpif.h'
!
DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
INTEGER, DIMENSION(2) :: n, nidbas, ngauss, alpha
DOUBLE PRECISION :: kx=4.d0, ky=3.d0, sigma=10.0d0
CHARACTER(len=4) :: prb='poly'
INTEGER :: levels=1, nu1=1, nu2=1, mu=1, nu0=1, nits
CHARACTER(len=4) :: relax='jac '
DOUBLE PRECISION :: omega=2.0d0/3.0d0, tol
LOGICAL :: nlfixed=.FALSE.
DOUBLE PRECISION :: t0, tsetup(2), tdirect, tbsolve, titer, titer_per_step
DOUBLE PRECISION :: resid_direct, errdisc_direct
DOUBLE PRECISION :: norma, normb
!
DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:)
DOUBLE PRECISION :: dx, dy
INTEGER :: ix, iy
!
DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:)
DOUBLE PRECISION, ALLOCATABLE :: sol_calc_grid(:,:)
DOUBLE PRECISION, ALLOCATABLE :: rresid(:), resid(:), errdisc(:)
!
DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:)
DOUBLE PRECISION, POINTER :: sol_direct_1d(:)
DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:)
!
INTEGER :: ierr, me
INTEGER :: l, nterms
INTEGER :: its
!
TYPE(grid2d), ALLOCATABLE :: grids(:)
TYPE(mg_info) :: info
!
NAMELIST /newrun/ n, nidbas, ngauss, kx, ky, sigma, alpha, levels, prb, &
& nu1, nu2, mu, nu0, relax, nits, tol, nlfixed, omega
!--------------------------------------------------------------------------------
! 1. Prologue
!
CALL mpi_init(ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
!
! Inputs
!
n = (/8, 8/)
nidbas=(/3,3/)
ngauss=(/2,2/)
alpha = (/0,0/)
kx=4
ky=3
sigma=10.0d0
levels=2
prb='poly'
nu1 = 1
nu2 = 1
mu = 1
nu0 = 1
nits = 10
tol = 1.e-8
relax = 'jac'
nlfixed= .FALSE.
!
IF(me.EQ.0) THEN
READ(*,newrun)
WRITE(*,newrun)
END IF
CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(alpha, 2, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(kx, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(ky, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(sigma, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nu1, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nu2, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nu0, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(mu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(relax, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(tol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
!
! Adjust number of levels and fill mg info.
!
levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2)))
info%nu1 = nu1
info%nu2 = nu2
info%mu = mu
info%nu0 = nu0
info%levels = levels
info%relax = relax
info%omega = omega
!
! Create grids
!
t0 = mpi_wtime()
!
dx = 1.0d0/REAL(n(1),8)
dy = 1.0d0/REAL(n(2),8)
ALLOCATE(x(0:n(1)), y(0:n(2)))
DO ix=0,n(1)
x(ix) = ix*dx
END DO
DO iy=0,n(2)
y(iy) = iy*dy
END DO
!
ALLOCATE(grids(levels))
CALL create_grid(x, y, nidbas, ngauss, alpha, grids)
WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry'
WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels)
!
! Construct RHS and set BC only on the finest grid
!
CALL disrhs(grids(1)%spl, grids(1)%f, rhs)
CALL ibcrhs(grids(1), grids(1)%f)
!
! Build FE matrices and set BC
!
nterms = 3
DO l=1,levels
CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms)
CALL ibcmat(grids(l), grids(l)%mata)
CALL to_mat(grids(l)%mata)
END DO
!
! Set BC on grid transfer matrices
!
CALL ibc_transf(grids,1,3)
CALL ibc_transf(grids,2,3)
!
tsetup(1) = mpi_wtime()-t0
!
! Clear and rebuild FE matrices and set BC
!
t0 = mpi_wtime()
nterms = 2
DO l=1,levels
CALL clear_mat(grids(l)%mata)
CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms, noinit=.TRUE.)
CALL ibcmat(grids(l), grids(l)%mata)
END DO
tsetup(2) = mpi_wtime()-t0
!--------------------------------------------------------------------------------
! 1. Analytical solution (at the finest grid, l=1)
!
ALLOCATE(sol_anal_grid(0:n(1),0:n(2)))
sol_anal_grid = sol(grids(1)%x, grids(1)%y)
!--------------------------------------------------------------------------------
! 2. Direct solution (at the finest grid, l=1)
!
WRITE(*,'(//a)') 'Direct solution for the finest grid problem'
!
ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), &
& source=grids(1)%f)
ALLOCATE(sol_direct_grid(0:n(1),0:n(2)))
!
sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct
!
!
PRINT*, 'shape of sol_direct', SHAPE(sol_direct)
PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d)
!
t0 = mpi_wtime()
sol_direct = grids(1)%f
CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
tdirect = mpi_wtime()-t0
!
t0 = mpi_wtime()
sol_direct = grids(1)%f
CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
resid_direct = residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d)
errdisc_direct = disc_err(grids(1)%spl, sol_direct, sol)
!
tbsolve = mpi_wtime()-t0
!
CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, &
& [0,0], sol_direct)
!
WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', &
& errdisc_direct, resid_direct
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
& NORM2(sol_anal_grid-sol_direct_grid) / NORM2(sol_anal_grid)
!
WRITE(*, '(a,1pe12.3)') 'Frobenius norm of A', matnorm(grids(1)%mata)
WRITE(*, '(a,1pe12.3)') 'Infinity norm of A ', matnorm(grids(1)%mata, 'inf')
WRITE(*, '(a,1pe12.3)') '1 norm of A ', matnorm(grids(1)%mata, '1')
!--------------------------------------------------------------------------------
! 3. Test multigrid V-cycle
!
WRITE(*,'(/a)') 'Multigrid MG V-cycles ...'
ALLOCATE(sol_calc_grid(0:n(1),0:n(2)))
ALLOCATE(errdisc(0:nits))
ALLOCATE(resid(0:nits))
ALLOCATE(rresid(0:nits))
!
! Norm of A and b
!
norma = matnorm(grids(1)%mata)
normb = NORM2(grids(1)%f1d)
!
! Initial guess
!
IF(nlfixed) THEN
grids(1)%v = sol_direct
ELSE
grids(1)%v = 0.0d0
END IF
!
errdisc(0) = disc_err(grids(1)%spl, grids(1)%v, sol)
resid(0) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d)
rresid(0) = resid(0) / ( norma*NORM2(grids(1)%v1d) + normb )
WRITE(*,'(a4,3(a12,a8))') 'its', 'residue', 'ratio', 'disc. err', 'ratio', &
& 'rel. resid', 'ratio'
WRITE(*,'(i4,3(1pe12.3,8x))') 0, resid(0), errdisc(0), rresid(0)
!
! Iterations
!
t0 = mpi_wtime()
DO its=1,nits
CALL mg(grids, info, 1)
errdisc(its) = disc_err(grids(1)%spl, grids(1)%v, sol)
resid(its) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d)
rresid(its) = resid(its) / ( norma*NORM2(grids(1)%v1d) + normb )
WRITE(*,'((i4,3(1pe12.3,0pf8.2)))') its, &
& resid(its), resid(its)/resid(its-1), &
& errdisc(its), errdisc(its)/errdisc(its-1), &
& rresid(its), rresid(its)/rresid(its-1)
IF(resid(its) .LE. tol) EXIT
END DO
nits = MIN(nits,its)
titer = mpi_wtime() - t0
titer_per_step = titer/REAL(its,8)
!
CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_calc_grid, &
& [0,0], grids(1)%v)
!--------------------------------------------------------------------------------
! 9. Epilogue
!
! Display timing
!
WRITE(*,'(a,2(1pe12.3))') 'Set up time (s) ', tsetup
WRITE(*,'(a,2(1pe12.3))') 'Direct and solve time (s) ', tdirect, tbsolve
WRITE(*,'(a,1pe12.3,i5)') 'Iter time (s) ', titer, nits
!
! Creata HDF5 file
!
IF(me.EQ.0) CALL h5file
!
CALL mpi_finalize(ierr)
!--------------------------------------------------------------------------------
CONTAINS
!+++
FUNCTION rhs(x, y)
!
! Return problem RHS
!
DOUBLE PRECISION, INTENT(in) :: x, y
DOUBLE PRECISION :: rhs
DOUBLE PRECISION :: x2, y2
SELECT CASE(TRIM(prb))
CASE('poly')
x2 = x*x; y2 = y*y;
rhs = 2.d0 * ( (1.0d0-6.d0*x2)*y2*(1.d0-y2) + &
& (1.0d0-6.d0*y2)*x2*(1.d0-x2) )
CASE('trig')
rhs = SIN(PI*kx*x)*SIN(PI*ky*y)
END SELECT
END FUNCTION rhs
!+++
FUNCTION sol(x, y)
!
! Return exact problem solution
!
DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
DOUBLE PRECISION :: sol(SIZE(x),SIZE(y))
DOUBLE PRECISION :: c
DOUBLE PRECISION :: x2(SIZE(x)), y2(SIZE(y))
INTEGER :: j
SELECT CASE(TRIM(prb))
CASE('poly')
x2 = x*x; y2 = y*y;
DO j=1,SIZE(y)
c = y2(j)*(y2(j)-1.d0)
sol(:,j) = c*x2(:)*(1.0d0-x2(:))
END DO
CASE('trig')
DO j=1,SIZE(y)
c = SIN(PI*ky*y(j)) / (PI**2*(kx**2+ky**2) + sigma**2)
sol(:,j) = c * SIN(PI*kx*x(:))
END DO
END SELECT
END FUNCTION sol
!+++
SUBROUTINE coefeq(x, y, idt, idw, c)
!
! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV)
!
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
c(1) = 1.0d0
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.0d0
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
!
c(3) = sigma
idt(3,1) = 0
idt(3,2) = 0
idw(3,1) = 0
idw(3,2) = 0
END SUBROUTINE coefeq
!+++
SUBROUTINE h5file
USE futils
CHARACTER(len=128) :: file='test_mg2d.h5'
INTEGER :: fid
INTEGER :: l
CHARACTER(len=64) :: dsname
CALL creatf(file, fid, real_prec='d')
CALL attach(fid, '/', 'NX', n(1))
CALL attach(fid, '/', 'NY', n(2))
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'KX', kx)
CALL attach(fid, '/', 'KY', ky)
CALL attach(fid, '/', 'SIGMA', sigma)
CALL attach(fid, '/', 'ALPHA1', alpha(1))
CALL attach(fid, '/', 'ALPHA2', alpha(2))
CALL attach(fid, '/', 'RELAX', relax)
CALL attach(fid, '/', 'NITS', nits)
CALL attach(fid, '/', 'LEVELS', levels)
CALL attach(fid, '/', 'NU1', nu1)
CALL attach(fid, '/', 'NU2', nu2)
CALL attach(fid, '/', 'NU0', nu0)
CALL attach(fid, '/', 'MU', mu)
CALL creatg(fid, '/mglevels')
DO l=1,levels
WRITE(dsname,'("/mglevels/level.",i2.2)') l
CALL creatg(fid, TRIM(dsname))
CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
IF(l.GT.1) THEN
CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
END IF
CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
END DO
!
! Solutions at finest grid
!
CALL creatg(fid, '/solutions')
CALL putarr(fid, '/solutions/xg', grids(1)%x)
CALL putarr(fid, '/solutions/yg', grids(1)%y)
CALL putarr(fid, '/solutions/anal', sol_anal_grid)
CALL putarr(fid, '/solutions/calc', sol_calc_grid)
CALL putarr(fid, '/solutions/direct', sol_direct_grid)
!
CALL creatg(fid, '/Iterations')
CALL putarr(fid, '/Iterations/residues', resid(0:nits))
CALL putarr(fid, '/Iterations/disc_errors', errdisc(0:nits))
!
CALL closef(fid)
END SUBROUTINE h5file
!+++
END PROGRAM
diff --git a/multigrid/src/test_mg2d_cyl.f90 b/multigrid/src/test_mg2d_cyl.f90
index 73644ec..34967dc 100644
--- a/multigrid/src/test_mg2d_cyl.f90
+++ b/multigrid/src/test_mg2d_cyl.f90
@@ -1,427 +1,427 @@
!>
!> @file test_mg2d_cyl.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test 2d multigrid
! Cylindrical case
!
USE multigrid
USE csr
IMPLICIT NONE
INCLUDE 'mpif.h'
!
DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
INTEGER, DIMENSION(2) :: n, nidbas, ngauss
INTEGER :: modem=22, modep=10
CHARACTER(len=4) :: prb='poly'
INTEGER :: levels=1, nu1=1, nu2=1, mu=1, nu0=1, nits
CHARACTER(len=4) :: relax='jac '
DOUBLE PRECISION :: omega=2.0d0/3.0d0, tol, rtol
LOGICAL :: nluniq=.TRUE.
LOGICAL :: nlfixed=.FALSE.
DOUBLE PRECISION :: t0, tsetup(2), tdirect, tbsolve, titer, titer_per_step
DOUBLE PRECISION :: resid_direct, errdisc_direct
DOUBLE PRECISION :: norma, normb
!
DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:)
DOUBLE PRECISION :: dx, dy
INTEGER :: ix, iy
!
DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:)
DOUBLE PRECISION, ALLOCATABLE :: sol_calc_grid(:,:)
DOUBLE PRECISION, ALLOCATABLE :: rresid(:), resid(:), errdisc(:)
!
DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:)
DOUBLE PRECISION, POINTER :: sol_direct_1d(:)
!
DOUBLE PRECISION, ALLOCATABLE :: sol_direct_orig(:,:)
DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:)
!
DOUBLE PRECISION, ALLOCATABLE :: sol_orig(:,:)
!
INTEGER :: ierr, me
INTEGER :: l, nterms
INTEGER :: its
!
TYPE(grid2d), ALLOCATABLE :: grids(:)
TYPE(mg_info) :: info
!
NAMELIST /newrun/ n, nidbas, ngauss, modem, modep, levels, prb, &
& nu1, nu2, mu, nu0, relax, nits, tol, rtol, nlfixed, &
& nluniq, omega
!--------------------------------------------------------------------------------
! 1. Prologue
!
CALL mpi_init(ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
!
! Inputs
!
n = (/8, 8/)
nidbas=(/3,3/)
ngauss=(/2,2/)
modem = 22
modep = 10
prb='poly'
levels=2
nu1 = 1
nu2 = 1
mu = 1
nu0 = 1
nits = 10
tol = 1.e-8
rtol = 1.e-10
relax = 'jac'
nlfixed= .FALSE.
nluniq = .TRUE.
!
IF(me.EQ.0) THEN
READ(*,newrun)
WRITE(*,newrun)
END IF
CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(modem, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(modep, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nu1, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nu2, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nu0, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(mu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(relax, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nluniq, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(tol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
!
! Adjust number of levels and fill mg info.
!
levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2)))
info%nu1 = nu1
info%nu2 = nu2
info%mu = mu
info%nu0 = nu0
info%levels = levels
info%relax = relax
info%omega = omega
!
! Create grids
!
t0 = mpi_wtime()
!
dx = 1.0d0/REAL(n(1),8)
dy = 2.0d0*pi/REAL(n(2),8)
ALLOCATE(x(0:n(1)), y(0:n(2)))
DO ix=0,n(1)
x(ix) = ix*dx
END DO
DO iy=0,n(2)
y(iy) = iy*dy
END DO
!
ALLOCATE(grids(levels))
CALL create_grid(x, y, nidbas, ngauss, [1, 0], grids, period=[.FALSE., .TRUE.], &
& debug_in=.FALSE.)
WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry'
WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels)
!!$ CALL printmat('** Prolongation matrix in 1st dim.**', grids(2)%transf(1))
!!$ CALL printmat('** Prolongation matrix in 2nd dim.**', grids(2)%transf(2))
!
! Construct RHS and set BC only on the finest grid
!
CALL disrhs(grids(1)%spl, grids(1)%f, rhs)
!!$ WRITE(*,'(a/(8(1pe12.3)))') 'Orig RHS at the axis', grids(1)%f(1,1:n(2))
CALL ibcrhs(grids(1), grids(1)%f, nluniq_in=nluniq)
!!$ WRITE(*,'(a/(8(1pe12.3)))') 'RHS at the axis', grids(1)%f(1,1:n(2))
!
! Build FE matrices and set BC
!
nterms = 2
DO l=1,levels
CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms)
CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq)
CALL to_mat(grids(l)%mata)
END DO
!
! Set BC on grid transfer matrices
!
CALL ibc_transf(grids, 1, 2) ! Only right boundary on r (1st dim.)
tsetup(1) = mpi_wtime()-t0
!
! Clear and rebuild FE matrices and set BC
!
t0 = mpi_wtime()
nterms = 2
DO l=1,levels
CALL clear_mat(grids(l)%mata)
CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms, noinit=.TRUE.)
CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq)
END DO
tsetup(2) = mpi_wtime()-t0
!--------------------------------------------------------------------------------
! 1. Analytical solution (at the finest grid, l=1)
!
ALLOCATE(sol_anal_grid(0:n(1),0:n(2)))
sol_anal_grid = sol(grids(1)%x, grids(1)%y)
!--------------------------------------------------------------------------------
! 2. Direct solution (at the finest grid, l=1)
!
WRITE(*,'(//a)') 'Direct solution for the finest grid problem'
!
ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), &
& source=grids(1)%f)
ALLOCATE(sol_direct_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)))
ALLOCATE(sol_direct_grid(0:n(1),0:n(2)))
!
sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct
!
!
PRINT*, 'shape of sol_direct', SHAPE(sol_direct)
PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d)
!
t0 = mpi_wtime()
sol_direct = grids(1)%f
CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
tdirect = mpi_wtime()-t0
!
t0 = mpi_wtime()
sol_direct = grids(1)%f
CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
resid_direct = residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d)
!
sol_direct_orig = sol_direct
CALL back_transf(grids(1), sol_direct_orig, nluniq_in=nluniq)
errdisc_direct = disc_err(grids(1)%spl, sol_direct_orig, sol)
!
tbsolve = mpi_wtime()-t0
!
CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, &
& [0,0], sol_direct_orig)
!
WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', &
& errdisc_direct, resid_direct
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
& NORM2(sol_anal_grid-sol_direct_grid) / NORM2(sol_anal_grid)
!--------------------------------------------------------------------------------
! 3. Test multigrid V-cycle
!
WRITE(*,'(/a)') 'Multigrid MG V-cycles ...'
ALLOCATE(sol_calc_grid(0:n(1),0:n(2)))
ALLOCATE(errdisc(0:nits))
ALLOCATE(resid(0:nits))
ALLOCATE(rresid(0:nits))
ALLOCATE(sol_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)))
!
! Norm of A and b
!
norma = matnorm(grids(1)%mata)
normb = NORM2(grids(1)%f1d)
!
WRITE(*, '(a,1pe12.3)') 'Frobenius norm of A', norma
WRITE(*, '(a,1pe12.3)') 'Infinity norm of A ', matnorm(grids(1)%mata, 'inf')
WRITE(*, '(a,1pe12.3)') '1 norm of A ', matnorm(grids(1)%mata, '1')
!
! Initial guess
!
IF(nlfixed) THEN
grids(1)%v = sol_direct
ELSE
grids(1)%v = 0.0d0
END IF
!
sol_orig(:,:) = grids(1)%v(:,:)
CALL back_transf(grids(1), sol_orig, nluniq_in=nluniq)
errdisc(0) = disc_err(grids(1)%spl, sol_orig, sol)
!
resid(0) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d)
rresid(0) = resid(0) / ( norma*NORM2(grids(1)%v1d) + normb )
WRITE(*,'(a4,3(a12,a8),a12)') 'its', 'residue', 'ratio', 'disc. err', &
& 'ratio', 'rel. resid', 'ratio', '||v||'
WRITE(*,'(i4,3(1pe12.3,8x),1pe12.3)') 0, resid(0), errdisc(0), rresid(0), NORM2(grids(1)%v1d)
!
! Iterations
!
t0 = mpi_wtime()
DO its=1,nits
CALL mg_cyl(grids, info, 1, nluniq_in=nluniq)
resid(its) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d)
rresid(its) = resid(its) / ( norma*NORM2(grids(1)%v1d) + normb )
!
sol_orig(:,:) = grids(1)%v(:,:)
CALL back_transf(grids(1), sol_orig, nluniq_in=nluniq)
errdisc(its) = disc_err(grids(1)%spl, sol_orig, sol)
!
WRITE(*,'((i4,3(1pe12.3,0pf8.2)),1pe12.3)') its, &
& resid(its), resid(its)/resid(its-1), &
& errdisc(its), errdisc(its)/errdisc(its-1), &
& rresid(its), rresid(its)/rresid(its-1), &
& NORM2(grids(1)%v1d)
IF(resid(its) .LE. tol .OR. rresid(its).LE. rtol ) EXIT
END DO
nits = MIN(nits,its)
titer = mpi_wtime() - t0
titer_per_step = titer/REAL(its,8)
!
CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_calc_grid, &
& [0,0], sol_orig)
!--------------------------------------------------------------------------------
! 9. Epilogue
!
! Display timing
!
WRITE(*,'(a,2(1pe12.3))') 'Set up time (s) ', tsetup
WRITE(*,'(a,2(1pe12.3))') 'Direct and solve time (s) ', tdirect, tbsolve
WRITE(*,'(a,1pe12.3,i5)') 'Iter time (s) ', titer, nits
!
! Creata HDF5 file
!
IF(me.EQ.0) CALL h5file
!
CALL mpi_finalize(ierr)
!--------------------------------------------------------------------------------
CONTAINS
!+++
FUNCTION rhs(r, theta)
!
! Return problem RHS
!
USE math_util, ONLY : root_bessj
DOUBLE PRECISION, INTENT(in) :: r, theta
DOUBLE PRECISION :: rhs
DOUBLE PRECISION :: nump
!
SELECT CASE(TRIM(prb))
CASE('poly')
rhs = REAL(4*(modem+1),8)*r**(modem+1)*COS(REAL(modem,8)*theta)
CASE('bess')
nump = root_bessj(modem, modep)
rhs = r * nump**2 * BESSEL_JN(modem, nump*r) * COS(modem*theta)
END SELECT
END FUNCTION rhs
!+++
FUNCTION sol(r, theta)
!
! Return exact problem solution
!
USE math_util, ONLY : root_bessj
DOUBLE PRECISION, INTENT(in) :: r(:), theta(:)
DOUBLE PRECISION :: sol(SIZE(r),SIZE(theta))
DOUBLE PRECISION :: nump
INTEGER :: j
!
SELECT CASE(TRIM(prb))
CASE('poly')
DO j=1,SIZE(theta)
sol(:,j) = (1-r(:)**2) * r(:)**modem * COS(modem*theta(j))
END DO
CASE('bess')
nump = root_bessj(modem, modep)
DO j=1,SIZE(theta)
sol(:,j) = BESSEL_JN(modem, nump*r(:)) * COS(modem*theta(j))
END DO
END SELECT
END FUNCTION sol
!+++
SUBROUTINE coefeq(r, theta, idt, idw, c)
!
! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV)
!
DOUBLE PRECISION, INTENT(in) :: r, theta
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
c(1) = r
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.0d0/r
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
!+++
SUBROUTINE h5file
USE futils
CHARACTER(len=128) :: file='test_mg2d_cyl.h5'
INTEGER :: fid
INTEGER :: l
CHARACTER(len=64) :: dsname
CALL creatf(file, fid, real_prec='d')
CALL attach(fid, '/', 'NX', n(1))
CALL attach(fid, '/', 'NY', n(2))
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'MODEM', modem)
CALL attach(fid, '/', 'MODEP', modep)
CALL attach(fid, '/', 'RELAX', relax)
CALL attach(fid, '/', 'NITS', nits)
CALL attach(fid, '/', 'LEVELS', levels)
CALL attach(fid, '/', 'NU1', nu1)
CALL attach(fid, '/', 'NU2', nu2)
CALL attach(fid, '/', 'NU0', nu0)
CALL attach(fid, '/', 'MU', mu)
CALL creatg(fid, '/mglevels')
DO l=1,levels
WRITE(dsname,'("/mglevels/level.",i2.2)') l
CALL creatg(fid, TRIM(dsname))
CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
IF(l.GT.1) THEN
CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
END IF
CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
END DO
!
! Solutions at finest grid
!
CALL creatg(fid, '/solutions')
CALL putarr(fid, '/solutions/xg', grids(1)%x)
CALL putarr(fid, '/solutions/yg', grids(1)%y)
CALL putarr(fid, '/solutions/anal', sol_anal_grid)
CALL putarr(fid, '/solutions/calc', sol_calc_grid)
CALL putarr(fid, '/solutions/direct', sol_direct_grid)
!
CALL creatg(fid, '/Iterations')
CALL putarr(fid, '/Iterations/residues', resid(0:nits))
CALL putarr(fid, '/Iterations/rresidues', rresid(0:nits))
CALL putarr(fid, '/Iterations/disc_errors', errdisc(0:nits))
!
CALL closef(fid)
END SUBROUTINE h5file
!+++
END PROGRAM
diff --git a/multigrid/src/test_mgp.f90 b/multigrid/src/test_mgp.f90
index 26f3f24..c09190d 100644
--- a/multigrid/src/test_mgp.f90
+++ b/multigrid/src/test_mgp.f90
@@ -1,242 +1,242 @@
!>
!> @file test_mgp.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test multigrid V-cycle for periodic problems
!
USE multigrid
IMPLICIT NONE
!
INTEGER :: nx=8, nidbas=1, ngauss=2, nits=40
INTEGER :: levels=2, nu1=1, nu2=1, mu=1, nu0=1
CHARACTER(len=4) :: relax='jac '
LOGICAL :: nlfixed = .FALSE.
DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0)
DOUBLE PRECISION :: omega=2.0d0/3.0d0
INTEGER :: l, nrank, dim, its
DOUBLE PRECISION :: errdisc_dir
DOUBLE PRECISION, ALLOCATABLE :: u_direct(:), u_exact(:), u_calc(:)
DOUBLE PRECISION, ALLOCATABLE :: sol_direct(:), sol_calc(:), sol_grid(:)
DOUBLE PRECISION, ALLOCATABLE :: err(:), resid(:), errdisc(:)
DOUBLE PRECISION, ALLOCATABLE :: errdisc_fmg(:)
!
TYPE(grid1d), ALLOCATABLE :: gridx(:)
TYPE(mg_info) :: info
!
NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, &
& relax, nits, nlfixed, levels, nu1, nu2, mu, nu0
!--------------------------------------------------------------------------------
! 1. Prologue
! Inputs
!
READ(*,newrun)
WRITE(*,newrun)
!
levels = MIN(levels, get_lmax(nx))
!
info%nu1 = nu1
info%nu2 = nu2
info%mu = mu
info%nu0 = nu0
info%levels = levels
info%relax = relax
info%omega = omega
!
! Create grids
!
ALLOCATE(gridx(levels))
CALL create_grid(nx, nidbas, ngauss, 0, gridx, period=.TRUE.)
WRITE(*,'(a/(20i6))') 'Number of intervals in grids', (gridx(l)%n, l=1,levels)
!
! Create FE matrice and set BC u(0)=u(1)=0
!
DO l=1,levels
CALL femat(gridx(l)%spl, gridx(l)%matap, coefeq)
END DO
!
! Construct RHS only on the finest grid
!
nrank = gridx(1)%rank ! Rank of the system (number of unknowns)
dim = nrank+nidbas ! Dimension of Splines space
CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs)
!--------------------------------------------------------------------------------
! 2. Direct solution
!
WRITE(*,'(//a)') 'Direct solution for the finest grid problem'
ALLOCATE(u_direct(0:nx))
ALLOCATE(sol_direct(nrank))
ALLOCATE(sol_grid(dim)) ! Required by GRIDVAL
!
CALL direct_solve(gridx(1), sol_direct)
sol_grid(1:nrank) = sol_direct(1:nrank)
sol_grid(nrank+1:dim) = sol_direct(1:nidbas)
CALL gridval(gridx(1)%spl, gridx(1)%x, u_direct, 0, sol_grid)
!
errdisc_dir = disc_err(gridx(1)%spl, sol_grid, sol)
WRITE(*,'(a,1pe12.3)') 'Discretization error', errdisc_dir
!--------------------------------------------------------------------------------
! 3. Solution from MG V-cycles
!
WRITE(*,'(//a)') 'Multigrid MG V-cycles'
ALLOCATE(sol_calc(nrank))
ALLOCATE(err(0:nits))
ALLOCATE(errdisc(0:nits))
ALLOCATE(resid(0:nits))
!
! Initial guess
!
sol_calc(:) = 0.0d0
sol_grid(:) = 0.0d0
IF(nlfixed) THEN
sol_calc(:) = sol_direct(:)
sol_grid(1:nrank) = sol_calc(1:nrank)
sol_grid(nrank+1:dim) = sol_calc(1:nidbas)
END IF
gridx(1)%v(:) = sol_calc(:)
err(0) = normf(gridx(1)%matmp, sol_calc-sol_direct)
errdisc(0) = disc_err(gridx(1)%spl, sol_grid, sol)
resid(0) = residue(gridx(1)%matap, gridx(1)%f, sol_calc)
!
! Iterations
!
DO its=1,nits
!
CALL mg(gridx, info, 1)
sol_calc(:) = gridx(1)%v(:)
sol_grid(1:nrank) = sol_calc(1:nrank)
sol_grid(nrank+1:dim) = sol_calc(1:nidbas)
!
err(its) = normf(gridx(1)%matmp, sol_calc-sol_direct)
errdisc(its) = disc_err(gridx(1)%spl, sol_grid, sol) ! will call GRIDVAL
resid(its) = residue(gridx(1)%matap, gridx(1)%f, sol_calc)
END DO
!
WRITE(*,'(a4,3(a12,a8))') 'its', 'error', 'ratio', 'residue', 'ratio', &
& 'disc. err', 'ratio'
WRITE(*,'(i4,3(1pe12.3,8x))') 0, err(0), resid(0), errdisc(0)
WRITE(*,'((i4,3(1pe12.3,0pf8.2)))') (its, err(its), err(its)/err(its-1), &
& resid(its), resid(its)/resid(its-1), &
& errdisc(its), errdisc(its)/errdisc(its-1), its=1,nits)
!--------------------------------------------------------------------------------
! 4. Solution from FMG
!
WRITE(*,'(//a)') 'Full Multigrid'
ALLOCATE(errdisc_fmg(nits))
DO its=1,nits
info%nu0 = its
!
CALL fmg(gridx, info, 1)
sol_calc(:) = gridx(1)%v(:)
sol_grid(1:nrank) = sol_calc(1:nrank)
sol_grid(nrank+1:dim) = sol_calc(1:nidbas)
!
errdisc_fmg(its) = disc_err(gridx(1)%spl, sol_grid, sol) ! will call GRIDVAL
resid(its) = residue(gridx(1)%matap, gridx(1)%f, sol_calc)
END DO
WRITE(*,'(a4,2(a12,a8))') 'nu0', 'residue', 'ratio','disc. err', 'ratio'
WRITE(*,'((i4,2(1pe12.3,0pf8.3)))') (its, resid(its), resid(its)/resid(its-1), &
& errdisc_fmg(its),errdisc_fmg(its)/errdisc_dir, its=1,nits)
!
! Grid values at final iteration
!
ALLOCATE(u_exact(0:nx))
ALLOCATE(u_calc(0:nx))
u_exact = sol(gridx(1)%x)
CALL gridval(gridx(1)%spl, gridx(1)%x, u_calc, 0, sol_grid)
!--------------------------------------------------------------------------------
! 9. Epilogue
!
! Creata HDF5 file
!
CALL h5file
!--------------------------------------------------------------------------------
CONTAINS
FUNCTION rhs(x)
DOUBLE PRECISION, INTENT(in) :: x
DOUBLE PRECISION :: rhs
rhs = SIN(pi*kmode*x)
END FUNCTION rhs
FUNCTION sol(x)
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: sol(SIZE(x))
sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:))
END FUNCTION sol
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(:)
c(1) = 1.0d0
idt(1) = 1
idw(1) = 1
c(2) = sigma
idt(2) = 0
idw(2) = 0
END SUBROUTINE coefeq
SUBROUTINE h5file
USE futils
CHARACTER(len=128) :: file='test_mgp.h5'
INTEGER :: fid
INTEGER :: l
CHARACTER(len=64) :: dsname
CALL creatf(file, fid, real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
CALL attach(fid, '/', 'SIGMA', sigma)
CALL attach(fid, '/', 'KMODE', kmode)
CALL attach(fid, '/', 'RELAX', relax)
CALL attach(fid, '/', 'NITS', nits)
CALL attach(fid, '/', 'NLFIXED', nlfixed)
CALL attach(fid, '/', 'LEVELS', levels)
CALL attach(fid, '/', 'NU1', nu1)
CALL attach(fid, '/', 'NU2', nu2)
CALL attach(fid, '/', 'NU0', nu0)
CALL attach(fid, '/', 'MU', mu)
CALL creatg(fid, '/mglevels')
DO l=1,levels
WRITE(dsname,'("/mglevels/level.",i2.2)') l
CALL creatg(fid, TRIM(dsname))
CALL putarr(fid, TRIM(dsname)//'/mata', gridx(l)%matap%val)
IF(l.GT.1) THEN
CALL putarr(fid, TRIM(dsname)//'/matp', gridx(l)%transf%val)
CALL attach(fid, TRIM(dsname)//'/matp', 'M', gridx(l)%transf%mrows)
CALL attach(fid, TRIM(dsname)//'/matp', 'N', gridx(l)%transf%ncols)
END IF
CALL putarr(fid, TRIM(dsname)//'/f', gridx(l)%f)
CALL putarr(fid, TRIM(dsname)//'/v', gridx(l)%v)
END DO
CALL creatg(fid, '/Iterations')
CALL putarr(fid, '/Iterations/errors', err)
CALL putarr(fid, '/Iterations/residues', resid)
CALL putarr(fid, '/Iterations/disc_errors', errdisc)
CALL putarr(fid, '/Iterations/disc_errors_fmg', errdisc_fmg)
CALL putarr(fid, '/Iterations/xgrid', gridx(1)%x)
CALL putarr(fid, '/Iterations/u_direct', u_direct)
CALL putarr(fid, '/Iterations/u_exact', u_exact)
CALL putarr(fid, '/Iterations/u_calc', u_calc)
CALL closef(fid)
END SUBROUTINE h5file
END PROGRAM main
diff --git a/multigrid/src/test_relax.f90 b/multigrid/src/test_relax.f90
index 9d00c66..c9de563 100644
--- a/multigrid/src/test_relax.f90
+++ b/multigrid/src/test_relax.f90
@@ -1,227 +1,227 @@
!>
!> @file test_relax.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test different relaxations
!
USE multigrid
USE math_util, ONLY : root_bessj
IMPLICIT NONE
!
INTEGER :: nx=8, nidbas=1, alpha=0, nits=40
INTEGER :: modem=22, modep=10
CHARACTER(len=4) :: relax='jac '
LOGICAL :: nlfixed = .FALSE.
DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0)
DOUBLE PRECISION :: omega=2.0d0/3.0d0
INTEGER :: ngauss, i, nrank, its
DOUBLE PRECISION, ALLOCATABLE :: u_exact(:), u_direct(:), u_calc(:)
DOUBLE PRECISION, ALLOCATABLE :: sol_direct(:)
DOUBLE PRECISION, ALLOCATABLE :: sol_calc(:), err(:), resid(:), errdisc(:)
DOUBLE PRECISION :: errdisc_dir
!
TYPE(grid1d) :: gridx(1)
!
NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, modem, modep, &
& alpha, relax, omega, nits, nlfixed
!--------------------------------------------------------------------------------
! 1. Prologue: read input, construct matrix and RHS
READ(*,newrun)
WRITE(*,newrun)
!
! Set grid
!
CALL create_grid(nx, nidbas, ngauss, alpha, gridx)
!
! Create FE matrice and set BC u(0)=u(1)=0
!
CALL femat(gridx(1)%spl, gridx(1)%mata, coefeq)
nrank = gridx(1)%rank
!
! Left Dirichlet BC (only for Cartesian geometry)
IF(alpha .EQ. 0) THEN
CALL ibcmat(1, gridx(1)%mata)
END IF
!
! Right Dirichlet BC
CALL ibcmat(nrank, gridx(1)%mata)
!
! Construct RHS
!
CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs)
!
! Left Dirichlet BC (only for Cartesian geometry)
IF(alpha .EQ. 0) THEN
gridx(1)%f(1) = 0.0d0
END IF
!
! Right Dirichlet BC
gridx(1)%f(nrank) = 0.0d0
!--------------------------------------------------------------------------------
! 2. Direct solution
!
! Direct solutions
!
ALLOCATE(sol_direct(nrank))
CALL direct_solve(gridx(1), sol_direct)
!
! Grid values
!
ALLOCATE(u_exact(0:nx))
ALLOCATE(u_direct(0:nx))
ALLOCATE(u_calc(0:nx))
!
u_exact = sol(gridx(1)%x)
CALL gridval(gridx(1)%spl, gridx(1)%x, u_direct, 0, sol_direct)
errdisc_dir = disc_err(gridx(1)%spl, sol_direct, sol)
WRITE(*,'(a,1pe12.3)') 'Discretization error', errdisc_dir
!--------------------------------------------------------------------------------
! 3. Relaxation
!
ALLOCATE(sol_calc(nrank))
ALLOCATE(err(0:nits))
ALLOCATE(errdisc(0:nits))
ALLOCATE(resid(0:nits))
!
! Initial guess
sol_calc(:) = 0.0d0
IF(nlfixed) THEN
sol_calc(:) = sol_direct(:)
END IF
err(0) = normf(gridx(1)%matm, sol_calc-sol_direct)
errdisc(0) = disc_err(gridx(1)%spl, sol_calc, sol)
resid(0) = residue(gridx(1)%mata, gridx(1)%f, sol_calc)
!
! Iterations
DO its=1,nits
SELECT CASE (TRIM(relax))
CASE('jac')
CALL jacobi(gridx(1)%mata, omega, 1, sol_calc, gridx(1)%f)
CASE('gs')
CALL gs(gridx(1)%mata, 1, sol_calc, gridx(1)%f)
END SELECT
err(its) = normf(gridx(1)%matm, sol_calc-sol_direct)
errdisc(its) = disc_err(gridx(1)%spl, sol_calc, sol)
resid(its) = residue(gridx(1)%mata, gridx(1)%f, sol_calc)
END DO
CALL gridval(gridx(1)%spl, gridx(1)%x, u_calc, 0, sol_calc)
!
WRITE(*,'(/a4,3a12)') 'its', 'error', 'residue', 'disc. err'
WRITE(*,'(i4,3(1pe12.3))') 0, err(0), resid(0), errdisc(0)
WRITE(*,'((i4,6(1pe12.3)))') (its, err(its), resid(its), errdisc(its), &
& err(its)/err(its-1), resid(its)/resid(its-1), &
& errdisc(its)/errdisc(its-1), its=1,nits,MAX(1,nits/10))
!
CALL h5file
!--------------------------------------------------------------------------------
CONTAINS
!+++
FUNCTION rhs(x)
DOUBLE PRECISION, INTENT(in) :: x
DOUBLE PRECISION :: rhs
DOUBLE PRECISION :: nump
SELECT CASE (alpha)
CASE(0) ! Cartesian geometry
rhs = SIN(pi*kmode*x)
CASE(1) ! Cylindrical
nump = root_bessj(modem, modep)
rhs = x * nump**2 * bessel_jn(modem, nump*x)
END SELECT
END FUNCTION rhs
!+++
FUNCTION sol(x)
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: sol(SIZE(x))
DOUBLE PRECISION :: nump
SELECT CASE (alpha)
CASE(0) ! Cartesian geometry
sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:))
CASE(1) ! Cylindrical
nump = root_bessj(modem, modep)
sol(:) = bessel_jn(modem, nump*x(:))
END SELECT
END FUNCTION sol
!+++
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(:)
SELECT CASE (alpha)
CASE(0) ! Cartesian geometry
c(1) = 1.0d0
idt(1) = 1
idw(1) = 1
c(2) = sigma
idt(2) = 0
idw(2) = 0
CASE(1) ! Cylindrical
c(1) = x
idt(1) = 1
idw(1) = 1
c(2) = REAL(modem,8)**2/x
idt(2) = 0
idw(2) = 0
END SELECT
END SUBROUTINE coefeq
!+++
SUBROUTINE h5file
USE futils
CHARACTER(len=128) :: file='test_relax.h5'
INTEGER :: fid
INTEGER :: l
CHARACTER(len=64) :: dsname
CALL creatf(file, fid, real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
CALL attach(fid, '/', 'KX', kmode)
CALL attach(fid, '/', 'SIGMA', sigma)
CALL attach(fid, '/', 'ALPHA', alpha)
CALL attach(fid, '/', 'RELAX', relax)
CALL attach(fid, '/', 'OMEGA', omega)
CALL attach(fid, '/', 'NITS', nits)
CALL attach(fid, '/', 'MODEM', modem)
CALL attach(fid, '/', 'MODEP', modep)
!
! Solutions at finest grid
!
CALL creatg(fid, '/solutions')
CALL putarr(fid, '/solutions/xg', gridx(1)%x)
CALL putarr(fid, '/solutions/direct', u_direct)
CALL putarr(fid, '/solutions/anal', u_exact)
CALL putarr(fid, '/solutions/calc', u_calc)
!
CALL creatg(fid, '/relaxation')
CALL putarr(fid, '/relaxation/errdisc', errdisc)
CALL putarr(fid, '/relaxation/resid', resid)
!
! Store FE matrix
!
CALL putmat(fid, '/MATA', gridx(1)%mata)
!
CALL closef(fid)
END SUBROUTINE h5file
!+++
END PROGRAM main
diff --git a/multigrid/src/test_relax2d.f90 b/multigrid/src/test_relax2d.f90
index 09583c5..932dfd5 100644
--- a/multigrid/src/test_relax2d.f90
+++ b/multigrid/src/test_relax2d.f90
@@ -1,334 +1,334 @@
!>
!> @file test_relax2d.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test 2d direcxt solve and relaxation methods
!
USE multigrid
USE csr
IMPLICIT NONE
INCLUDE 'mpif.h'
!
DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
INTEGER, DIMENSION(2) :: n, nidbas, ngauss, alpha
DOUBLE PRECISION :: kx=4.d0, ky=3.d0, sigma=10.0d0
INTEGER :: levels=1, nits=1000
CHARACTER(len=4) :: relax='jac '
DOUBLE PRECISION :: omega=2.0d0/3.0d0
DOUBLE PRECISION :: t0
!
DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:)
DOUBLE PRECISION :: dx, dy
INTEGER :: ix, iy
!
DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:), sol_relax(:,:)
DOUBLE PRECISION, POINTER :: sol_direct_1d(:), sol_relax_1d(:)
DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:)
DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:)
DOUBLE PRECISION, ALLOCATABLE :: resid(:), errdisc(:)
!
INTEGER :: ierr, me
INTEGER :: l, nterms
INTEGER :: its
!
TYPE(grid2d), ALLOCATABLE :: grids(:)
!
NAMELIST /newrun/ n, nidbas, ngauss, kx, ky, sigma, alpha, levels, nits, relax
!--------------------------------------------------------------------------------
! 1. Prologue
!
CALL mpi_init(ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
!
! Inputs
!
n = (/8, 8/)
nidbas=(/3,3/)
ngauss=(/2,2/)
alpha = (/0,0/)
kx=4
ky=3
sigma=10.0d0
levels=2
relax='jac'
nits=100
!
IF(me.EQ.0) THEN
READ(*,newrun)
WRITE(*,newrun)
END IF
CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(alpha, 2, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(kx, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(ky, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(sigma, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(relax, LEN(relax), MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
!
! Adjust number of levels
!
levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2)))
!
! Create grids
!
dx = 1.0d0/REAL(n(1),8)
dy = 1.0d0/REAL(n(2),8)
ALLOCATE(x(0:n(1)), y(0:n(2)))
DO ix=0,n(1)
x(ix) = ix*dx
END DO
DO iy=0,n(2)
y(iy) = iy*dy
END DO
!
ALLOCATE(grids(levels))
CALL create_grid(x, y, nidbas, ngauss, alpha, grids)
WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry'
WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels)
!
! Construct RHS and set BC only on the finest grid
!
CALL disrhs(grids(1)%spl, grids(1)%f, rhs)
CALL ibcrhs(grids(1), grids(1)%f)
!!$ CALL printmat('** RHS **', grids(1)%f)
!
! Build FE matrices and set BC
!
nterms = 3
DO l=1,levels
CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms)
CALL ibcmat(grids(l), grids(l)%mata)
CALL to_mat(grids(l)%mata)
END DO
!--------------------------------------------------------------------------------
! 1. Direct solution (at the finest grid, l=1)
!
WRITE(*,'(//a)') 'Direct solution for the finest grid problem'
ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), &
& source=grids(1)%f)
sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct
PRINT*, 'shape of sol_direct', SHAPE(sol_direct)
PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d)
!
t0 = mpi_wtime()
CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
WRITE(*,'(a,1pe12.3)') 'Fact. + solve time (s) =', mpi_wtime()-t0
!
sol_direct = grids(1)%f
t0 = mpi_wtime()
CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
WRITE(*,'(a,1pe12.3)') 'Solve time (s) =', mpi_wtime()-t0
!
ALLOCATE(sol_direct_grid(0:n(1),0:n(2)))
ALLOCATE(sol_anal_grid(0:n(1),0:n(2)))
CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, &
& [0,0], sol_direct)
sol_anal_grid = sol(grids(1)%x, grids(1)%y)
WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', &
& disc_err(grids(1)%spl, sol_direct, sol), &
& residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d)
!--------------------------------------------------------------------------------
! 2. Relaxation (at the finest grid, l=1)
!
ALLOCATE(errdisc(0:nits))
ALLOCATE(resid(0:nits))
ALLOCATE(sol_relax(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)))
sol_relax_1d(1:SIZE(grids(1)%v1d)) => sol_relax
!
sol_relax_1d = 0.0d0
errdisc(0) = disc_err(grids(1)%spl, sol_relax, sol)
resid(0) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d)
t0 = mpi_wtime()
DO its=1,nits
SELECT CASE (TRIM(relax))
CASE('jac')
CALL jacobi(grids(1)%mata, omega, 1, sol_relax_1d, grids(1)%f1d)
CASE('gs')
CALL gs(grids(1)%mata, 1, sol_relax_1d, grids(1)%f1d)
END SELECT
errdisc(its) = disc_err(grids(1)%spl, sol_relax, sol)
resid(its) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d)
END DO
WRITE(*,'(a,1pe12.3)') 'Iterative solve time (s/iteration) =', (mpi_wtime()-t0)/REAL(nits,8)
!
WRITE(*,'(/a4,3a12)') 'its', 'residue', 'disc. err'
WRITE(*,'(i4,3(1pe12.3))') 0, resid(0), errdisc(0)
WRITE(*,'((i4,4(1pe12.3)))') (its, resid(its), errdisc(its), &
& resid(its)/resid(its-1), &
& errdisc(its)/errdisc(its-1), its=1,nits,MAX(1,nits/10))
!--------------------------------------------------------------------------------
! 9. Epilogue
!
! Creata HDF5 file
!
IF(me.EQ.0) CALL h5file
!
CALL mpi_finalize(ierr)
!--------------------------------------------------------------------------------
CONTAINS
!+++
FUNCTION rhs(x, y)
!
! Return problem RHS
!
DOUBLE PRECISION, INTENT(in) :: x, y
DOUBLE PRECISION :: rhs
rhs = SIN(PI*kx*x)*SIN(PI*ky*y)
END FUNCTION rhs
!+++
FUNCTION sol(x, y)
!
! Return exact problem solution
!
DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
DOUBLE PRECISION :: sol(SIZE(x),SIZE(y))
DOUBLE PRECISION :: c
INTEGER :: j
DO j=1,SIZE(y)
c = SIN(PI*ky*y(j)) / (PI**2*(kx**2+ky**2) + sigma**2)
sol(:,j) = c * SIN(PI*kx*x(:))
END DO
END FUNCTION sol
!+++
SUBROUTINE coefeq(x, y, idt, idw, c)
!
! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV)
!
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
c(1) = 1.0d0
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.0d0
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
!
c(3) = sigma
idt(3,1) = 0
idt(3,2) = 0
idw(3,1) = 0
idw(3,2) = 0
END SUBROUTINE coefeq
!+++
SUBROUTINE h5file
USE futils
CHARACTER(len=128) :: file='test_relax2d.h5'
INTEGER :: fid
INTEGER :: l
CHARACTER(len=64) :: dsname
CALL creatf(file, fid, real_prec='d')
CALL attach(fid, '/', 'NX', n(1))
CALL attach(fid, '/', 'NY', n(2))
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'KX', kx)
CALL attach(fid, '/', 'KY', ky)
CALL attach(fid, '/', 'SIGMA', sigma)
CALL attach(fid, '/', 'ALPHA1', alpha(1))
CALL attach(fid, '/', 'ALPHA2', alpha(2))
CALL attach(fid, '/', 'LEVELS', levels)
CALL attach(fid, '/', 'RELAX', relax)
CALL attach(fid, '/', 'NITS', nits)
CALL creatg(fid, '/mglevels')
DO l=1,levels
WRITE(dsname,'("/mglevels/level.",i2.2)') l
CALL creatg(fid, TRIM(dsname))
CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
IF(l.GT.1) THEN
CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
END IF
CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
END DO
!
! Solutions at finest grid
!
CALL creatg(fid, '/solutions')
CALL putarr(fid, '/solutions/xg', grids(1)%x)
CALL putarr(fid, '/solutions/yg', grids(1)%y)
CALL putarr(fid, '/solutions/direct', sol_direct_grid)
CALL putarr(fid, '/solutions/anal', sol_anal_grid)
!
CALL creatg(fid, '/relaxation')
CALL putarr(fid, '/relaxation/errdisc', errdisc)
CALL putarr(fid, '/relaxation/resid', resid)
!
IF(ALLOCATED(grids(1)%mata%mumps)) THEN
CALL myputmat(fid, '/MUMPS', grids(1)%mata%mumps)
END IF
!
CALL closef(fid)
END SUBROUTINE h5file
!+++
SUBROUTINE myputmat(fid, label, mat, str)
!
! Write matrix to hdf5 file
!
USE futils
!
INTEGER, INTENT(in) :: fid
CHARACTER(len=*), INTENT(in) :: label
TYPE(mumps_mat) :: mat
CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
CHARACTER(len=128) :: mumps_grp
!
IF(PRESENT(str)) THEN
CALL creatg(fid, label, str)
ELSE
CALL creatg(fid, label)
END IF
CALL attach(fid, label, 'RANK', mat%rank)
CALL attach(fid, label, 'NNZ', mat%nnz)
CALL attach(fid, label, 'NLSYM', mat%nlsym)
CALL attach(fid, label, 'NLPOS', mat%nlpos)
CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
CALL putarr(fid, TRIM(label)//'/cols', mat%mumps_par%JCN_loc)
CALL putarr(fid, TRIM(label)//'/val', mat%mumps_par%A_loc)
!
mumps_grp = TRIM(label)//'/mumps_par'
CALL creatg(fid, mumps_grp)
CALL attach(fid, mumps_grp, 'PAR', mat%mumps_par%PAR)
CALL attach(fid, mumps_grp, 'SYM', mat%mumps_par%SYM)
CALL putarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN_loc)
!
END SUBROUTINE myputmat
END PROGRAM
diff --git a/multigrid/src/test_relax2d_cyl.f90 b/multigrid/src/test_relax2d_cyl.f90
index c02b199..d26d308 100644
--- a/multigrid/src/test_relax2d_cyl.f90
+++ b/multigrid/src/test_relax2d_cyl.f90
@@ -1,369 +1,369 @@
!>
!> @file test_relax2d_cyl.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test 2d direcxt solve and relaxation methods
! Cylindrical case
!
USE multigrid
USE csr
IMPLICIT NONE
INCLUDE 'mpif.h'
!
DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
INTEGER, DIMENSION(2) :: n, nidbas, ngauss
INTEGER :: modem=22, modep=10
INTEGER :: levels=1, nits=1000
CHARACTER(len=4) :: relax='jac ', prb='poly'
DOUBLE PRECISION :: omega=2.0d0/3.0d0
LOGICAL :: nluniq=.TRUE.
LOGICAL :: nlfixed=.FALSE.
DOUBLE PRECISION :: t0
DOUBLE PRECISION :: resid_direct, errdisc_direct
!
DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:)
DOUBLE PRECISION :: dx, dy
INTEGER :: ix, iy
!
DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:), sol_relax(:,:)
DOUBLE PRECISION, POINTER :: sol_direct_1d(:), sol_relax_1d(:)
DOUBLE PRECISION, ALLOCATABLE :: sol_direct_orig(:,:), sol_relax_orig(:,:)
DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:)
DOUBLE PRECISION, ALLOCATABLE :: sol_relax_grid(:,:)
DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:)
DOUBLE PRECISION, ALLOCATABLE :: resid(:), errdisc(:)
!
INTEGER :: ierr, me
INTEGER :: l, nterms, j
INTEGER :: its
!
TYPE(grid2d), ALLOCATABLE :: grids(:)
!
NAMELIST /newrun/ n, nidbas, ngauss, modem, modep, levels, omega, nits, &
& relax, prb, nlfixed, nluniq
!--------------------------------------------------------------------------------
! 1. Prologue
!
CALL mpi_init(ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
!
! Inputs
!
n = (/8, 8/)
nidbas=(/3,3/)
ngauss=(/2,2/)
modem = 22
modep = 10
levels=2
relax='jac'
prb='poly'
nits=100
nluniq = .TRUE.
!
IF(me.EQ.0) THEN
READ(*,newrun)
WRITE(*,newrun)
END IF
CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(modem, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(modep, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nluniq, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(relax, LEN(relax), MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
!
! Adjust number of levels
!
levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2)))
!
! Create grids
!
dx = 1.0d0/REAL(n(1),8)
dy = 2.0d0*pi/REAL(n(2),8)
ALLOCATE(x(0:n(1)), y(0:n(2)))
DO ix=0,n(1)
x(ix) = ix*dx
END DO
DO iy=0,n(2)
y(iy) = iy*dy
END DO
!
ALLOCATE(grids(levels))
CALL create_grid(x, y, nidbas, ngauss, [1, 0], grids, period=[.FALSE., .TRUE.], &
& debug_in=.FALSE.)
WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry'
WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels)
!
! Construct RHS and set BC only on the finest grid
!
CALL disrhs(grids(1)%spl, grids(1)%f, rhs)
CALL ibcrhs(grids(1), grids(1)%f, nluniq_in=nluniq)
!!$ CALL printmat('** RHS **', grids(1)%f)
!
! Build FE matrices and set BC
!
nterms = 2
DO l=1,levels
CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms)
CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq)
CALL to_mat(grids(l)%mata)
END DO
!--------------------------------------------------------------------------------
! 1. Direct solution (at the finest grid, l=1)
!
WRITE(*,'(//a)') 'Direct solution for the finest grid problem'
!
ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), &
& source=grids(1)%f)
sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct
!
ALLOCATE(sol_direct_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)))
!
PRINT*, 'shape of sol_direct', SHAPE(sol_direct)
PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d)
!
t0 = mpi_wtime()
CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
WRITE(*,'(a,1pe12.3)') 'Fact. + solve time (s) =', mpi_wtime()-t0
!
sol_direct = grids(1)%f
t0 = mpi_wtime()
CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
resid_direct = residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d)
!
sol_direct_orig = sol_direct
CALL back_transf(grids(1), sol_direct_orig, nluniq_in=nluniq)
errdisc_direct = disc_err(grids(1)%spl, sol_direct_orig, sol)
!
WRITE(*,'(a,1pe12.3)') 'Solve time (s) =', mpi_wtime()-t0
!
ALLOCATE(sol_direct_grid(0:n(1),0:n(2)))
ALLOCATE(sol_anal_grid(0:n(1),0:n(2)))
!
CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, &
& [0,0], sol_direct_orig)
!
sol_anal_grid = sol(grids(1)%x, grids(1)%y)
WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', &
& errdisc_direct, resid_direct
WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
& NORM2(sol_anal_grid-sol_direct_grid) / NORM2(sol_anal_grid)
!--------------------------------------------------------------------------------
! 2. Relaxation (at the finest grid, l=1)
!
ALLOCATE(errdisc(0:nits))
ALLOCATE(resid(0:nits))
ALLOCATE(sol_relax(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)))
ALLOCATE(sol_relax_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)))
sol_relax_1d(1:SIZE(grids(1)%v1d)) => sol_relax
!
! Initial guess
!
IF(nlfixed) THEN
sol_relax = sol_direct ! Test fixed point\
ELSE
sol_relax = 0.0d0
END IF
!
sol_relax_orig = sol_relax
CALL back_transf(grids(1), sol_relax_orig, nluniq_in=nluniq)
errdisc(0) = disc_err(grids(1)%spl, sol_relax_orig, sol)
resid(0) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d)
!
t0 = mpi_wtime()
DO its=1,nits
SELECT CASE (TRIM(relax))
CASE('jac')
CALL jacobi(grids(1)%mata, omega, 1, sol_relax_1d, grids(1)%f1d)
CASE('gs')
CALL gs(grids(1)%mata, 1, sol_relax_1d, grids(1)%f1d)
END SELECT
resid(its) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d)
!
sol_relax_orig = sol_relax
CALL back_transf(grids(1), sol_relax_orig, nluniq_in=nluniq)
!
errdisc(its) = disc_err(grids(1)%spl, sol_relax_orig, sol)
END DO
WRITE(*,'(a,1pe12.3)') 'Iterative solve time (s/iteration) =', (mpi_wtime()-t0)/REAL(nits,8)
!
ALLOCATE(sol_relax_grid(0:n(1),0:n(2)))
CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_relax_grid, &
& [0,0], sol_relax_orig)
!
WRITE(*,'(/a4,3a12)') 'its', 'residue', 'disc. err'
WRITE(*,'(i4,3(1pe12.3))') 0, resid(0), errdisc(0)
WRITE(*,'((i4,4(1pe12.3)))') (its, resid(its), errdisc(its), &
& resid(its)/resid(its-1), &
& errdisc(its)/errdisc(its-1), its=1,nits,MAX(1,nits/10))
!--------------------------------------------------------------------------------
! 9. Epilogue
!
! Creata HDF5 file
!
IF(me.EQ.0) CALL h5file
!
CALL mpi_finalize(ierr)
!--------------------------------------------------------------------------------
CONTAINS
!+++
FUNCTION norm2(x)
!
! Compute the 2-norm of array x
!
IMPLICIT NONE
DOUBLE PRECISION :: norm2
DOUBLE PRECISION, INTENT(in) :: x(:,:)
DOUBLE PRECISION :: sum2
INTEGER :: i, j
!
sum2 = 0.0d0
DO i=1,SIZE(x,1)
DO j=1,SIZE(x,2)
sum2 = sum2 + x(i,j)**2
END DO
END DO
norm2 = SQRT(sum2)
END FUNCTION norm2
!+++
FUNCTION rhs(r, theta)
!
! Return problem RHS
!
USE math_util, ONLY : root_bessj
DOUBLE PRECISION, INTENT(in) :: r, theta
DOUBLE PRECISION :: rhs
DOUBLE PRECISION :: nump
!
SELECT CASE(TRIM(prb))
CASE('poly')
rhs = REAL(4*(modem+1),8)*r**(modem+1)*COS(REAL(modem,8)*theta)
CASE('bess')
nump = root_bessj(modem, modep)
rhs = r * nump**2 * BESSEL_JN(modem, nump*r) * COS(modem*theta)
END SELECT
END FUNCTION rhs
!+++
FUNCTION sol(r, theta)
!
! Return exact problem solution
!
USE math_util, ONLY : root_bessj
DOUBLE PRECISION, INTENT(in) :: r(:), theta(:)
DOUBLE PRECISION :: sol(SIZE(r),SIZE(theta))
DOUBLE PRECISION :: nump
INTEGER :: j
!
SELECT CASE(TRIM(prb))
CASE('poly')
DO j=1,SIZE(theta)
sol(:,j) = (1-r(:)**2) * r(:)**modem * COS(modem*theta(j))
END DO
CASE('bess')
nump = root_bessj(modem, modep)
DO j=1,SIZE(theta)
sol(:,j) = BESSEL_JN(modem, nump*r(:)) * COS(modem*theta(j))
END DO
END SELECT
END FUNCTION sol
!+++
SUBROUTINE coefeq(r, theta, idt, idw, c)
!
! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV)
!
DOUBLE PRECISION, INTENT(in) :: r, theta
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
c(1) = r
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.0d0/r
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
!+++
SUBROUTINE h5file
USE futils
CHARACTER(len=128) :: file='test_relax2d_cyl.h5'
INTEGER :: fid
INTEGER :: l
CHARACTER(len=64) :: dsname
CALL creatf(file, fid, real_prec='d')
CALL attach(fid, '/', 'NX', n(1))
CALL attach(fid, '/', 'NY', n(2))
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'MODEM', modem)
CALL attach(fid, '/', 'MODEP', modep)
CALL attach(fid, '/', 'LEVELS', levels)
CALL attach(fid, '/', 'RELAX', relax)
CALL attach(fid, '/', 'NITS', nits)
CALL attach(fid, '/', 'OMEGA', omega)
CALL creatg(fid, '/mglevels')
DO l=1,levels
WRITE(dsname,'("/mglevels/level.",i2.2)') l
CALL creatg(fid, TRIM(dsname))
CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
IF(l.GT.1) THEN
CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
END IF
CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
END DO
!
! Solutions at finest grid
!
CALL creatg(fid, '/solutions')
CALL putarr(fid, '/solutions/xg', grids(1)%x)
CALL putarr(fid, '/solutions/yg', grids(1)%y)
CALL putarr(fid, '/solutions/direct', sol_direct_grid)
CALL putarr(fid, '/solutions/relax', sol_relax_grid)
CALL putarr(fid, '/solutions/anal', sol_anal_grid)
!
CALL creatg(fid, '/relaxation')
CALL putarr(fid, '/relaxation/errdisc', errdisc)
CALL putarr(fid, '/relaxation/resid', resid)
!
CALL closef(fid)
END SUBROUTINE h5file
!+++
END PROGRAM
diff --git a/multigrid/src/test_stencil.f90 b/multigrid/src/test_stencil.f90
index e1a0dc0..934af01 100644
--- a/multigrid/src/test_stencil.f90
+++ b/multigrid/src/test_stencil.f90
@@ -1,238 +1,238 @@
!>
!> @file test_stencil.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
MODULE mod
USE iso_fortran_env, ONLY : real64
IMPLICIT NONE
!
INTEGER, PARAMETER :: rkind = real64
LOGICAL, PARAMETER :: nldebug=.FALSE.
REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0)
CONTAINS
END MODULE mod
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
PROGRAM main
USE mpi
USE pputils2, ONLY : dist1d, exchange, norm2_vec=>ppnorm2
USE stencil, ONLY : stencil_2d, init, laplacian, vmx, putmat
USE mod
IMPLICIT NONE
!
INTEGER, PARAMETER :: ndims=2
!
INTEGER :: me, neighs(4), npes, ierr
INTEGER, DIMENSION(ndims) :: dims=[0,0]
INTEGER, DIMENSION(ndims) :: coords, comm1d
LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.]
LOGICAL :: reorder =.FALSE.
INTEGER :: comm_cart
!
INTEGER :: nx=4, ny=4 ! Number of intervals
INTEGER, DIMENSION(ndims) :: e, s, lb, ub, npt_glob, npt_loc
!
REAL(rkind), ALLOCATABLE :: xgrid(:), ygrid(:)
REAL(rkind), ALLOCATABLE :: arr(:,:), fexact(:,:)
REAL(rkind), ALLOCATABLE :: barr1(:,:), barr2(:,:), barr3(:,:)
REAL(rkind) :: dx, dy
REAL(rkind) :: err
INTEGER, DIMENSION(5,2) :: id ! 5-point stencil
INTEGER :: npoints
TYPE(stencil_2d) :: mat
INTEGER :: i, j
!
NAMELIST /in/ nx, ny
!================================================================================
! 1.0 Prologue
!
! 2D process grid
CALL mpi_init(ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
CALL mpi_dims_create(npes, ndims, dims, ierr)
CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,&
& ierr)
!
CALL mpi_comm_rank(comm_cart, me, ierr)
CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr)
CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr)
CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr)
!
CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr)
CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr)
!
! Read problem inputs
IF(me.EQ.0) THEN
READ(*,in)
WRITE(*,in)
END IF
CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr)
CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr)
!================================================================================
! 2.0 2d Grid construction
!
! Partition 2D grid
npt_glob(1) = nx+1
npt_glob(2) = ny+1
DO i=1,ndims
CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i))
e(i) = s(i) + npt_loc(i) - 1
END DO
WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', &
& (coords(i),s(i),e(i),i=1,ndims)
!
! Global mesh
dx = 1.0d0/REAL(nx)
dy = 1.0d0/REAL(ny)
ALLOCATE(xgrid(0:nx))
ALLOCATE(ygrid(0:ny))
xgrid = [ (i*dx, i=0,nx) ]
ygrid = [ (i*dy, i=0,ny) ]
!================================================================================
! 3.0 FD Laplacian
!
id=RESHAPE([ 0, -1, 0, 1, 0, &
0, 0,-1, 0, 1], &
[5,2])
npoints = 5
CALL init(s, e, id, .FALSE., mat, comm_cart)
!
CALL laplacian(dx, dy, mat)
!================================================================================
! 4.0 Check matrice-vector product
!
! Local arrays with ghost cells
lb = mat%s-1
ub = mat%e+1
ALLOCATE(arr(lb(1):ub(1),lb(2):ub(2)))
ALLOCATE(fexact(lb(1):ub(1),lb(2):ub(2)))
ALLOCATE(barr1(lb(1):ub(1),lb(2):ub(2)))
ALLOCATE(barr2(lb(1):ub(1),lb(2):ub(2)))
ALLOCATE(barr3(lb(1):ub(1),lb(2):ub(2)))
!
! Constant vector => Laplacian = 0
barr1 = 0
arr = 1.0
barr1 = vmx(mat,arr)
IF(mat%s(1).EQ.0) barr1(0,:) = 0.0 ! discard boundary values
IF(mat%e(1).EQ.nx) barr1(nx,:) = 0.0
IF(mat%s(2).EQ.0) barr1(:,0) = 0.0
IF(mat%e(2).EQ.ny) barr1(:,ny) = 0.0
err = norm2_vec(barr1,comm_cart,root=0,garea=[1,1])
IF(me.EQ.0) THEN
WRITE(*,'(/a,1pe12.3)') 'Constant vector: ||B1|| =', err
END IF
!
! Bilinear vector => Laplacian = 0
arr =0.0d0
barr2=0.0d0
DO j=mat%s(2),mat%e(2)
DO i=mat%s(1),mat%e(1)
arr(i,j) = xgrid(i)*ygrid(j)
END DO
END DO
CALL exchange(comm_cart, arr)
barr2 = vmx(mat,arr)
IF(mat%s(1).EQ.0) barr2(0,:) = 0.0 ! discard boundary values
IF(mat%e(1).EQ.nx) barr2(nx,:) = 0.0
IF(mat%s(2).EQ.0) barr2(:,0) = 0.0
IF(mat%e(2).EQ.ny) barr2(:,ny) = 0.0
err = norm2_vec(barr2, comm_cart,root=0,garea=[1,1])
IF(me.EQ.0) THEN
WRITE(*,'(a,1pe12.3)') 'Bilinear vector: ||B2|| =', err
END IF
!
! Biquadratic vector => Laplacian = fexact
DO j=mat%s(2),mat%e(2)
DO i=mat%s(1),mat%e(1)
arr(i,j) = (xgrid(i)*ygrid(j))**2/4.0d0
fexact(i,j) = (xgrid(i)**2 + ygrid(j)**2)/2.0d0
END DO
END DO
CALL exchange(comm_cart, arr)
CALL exchange(comm_cart, fexact)
barr3 = vmx(mat,arr) - fexact
IF(mat%s(1).EQ.0) barr3(0,:) = 0.0 ! discard boundary values
IF(mat%e(1).EQ.nx) barr3(nx,:) = 0.0
IF(mat%s(2).EQ.0) barr3(:,0) = 0.0
IF(mat%e(2).EQ.ny) barr3(:,ny) = 0.0
err = norm2_vec(barr3,comm_cart,root=0,garea=[1,1])
IF(me.EQ.0) THEN
WRITE(*,'(a,1pe12.3)') 'Biquadratic vector: ||B3|| =', err
END IF
!================================================================================
! 9.0 Epilogue
CALL h5file
CALL MPI_FINALIZE(ierr)
CONTAINS
SUBROUTINE disp(str, arr)
CHARACTER(len=*), INTENT(in) :: str
REAL(rkind), INTENT(in) :: arr(:,:)
INTEGER :: j
WRITE(*,'(/a)') str
DO j=1,SIZE(arr,2)
WRITE(*,'(10f8.3)') arr(:,j)
END DO
END SUBROUTINE disp
!
SUBROUTINE h5file
USE futils
CHARACTER(len=128) :: file='test_stencil.h5'
INTEGER :: fid
CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart)
CALL putarr(fid, '/xgrid', xgrid, ionode=0) ! only rank 0 does IO
CALL putarr(fid, '/ygrid', ygrid, ionode=0) ! only rank 0 does IO
CALL putarrnd(fid, '/barr1', barr1,(/1,2/), garea=(/1,1/))
CALL putarrnd(fid, '/barr2', barr2,(/1,2/), garea=(/1,1/))
CALL putarrnd(fid, '/barr3', barr3,(/1,2/), garea=(/1,1/))
CALL putmat(fid, '/MAT', mat)
CALL closef(fid)
END SUBROUTINE h5file
!
FUNCTION outerprod(x, y) RESULT(r)
!
! outer product
!
REAL(rkind), INTENT(in) :: x(:), y(:)
REAL(rkind) :: r(SIZE(x),SIZE(y))
INTEGER :: i, j
DO j=1,SIZE(y)
DO i=1,SIZE(x)
r(i,j) = x(i)*y(j)
END DO
END DO
END FUNCTION outerprod
!
FUNCTION rhs(x,y)
REAL(rkind), INTENT(in) :: x(:), y(:)
REAL(rkind) :: rhs(SIZE(x),SIZE(y))
rhs = -10.d0*pi**2 * outerprod(SIN(pi*x), SIN(3.d0*pi*y))
END FUNCTION rhs
!
FUNCTION exact(x,y)
REAL(rkind), INTENT(in) :: x(:), y(:)
REAL(rkind) :: exact(SIZE(x),SIZE(y))
exact = outerprod(SIN(pi*x), SIN(3.d0*pi*y))
END FUNCTION exact
END PROGRAM main
diff --git a/multigrid/src/test_stencilg.f90 b/multigrid/src/test_stencilg.f90
index 0b920cc..0696dd2 100644
--- a/multigrid/src/test_stencilg.f90
+++ b/multigrid/src/test_stencilg.f90
@@ -1,203 +1,203 @@
!>
!> @file test_stencilg.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
MODULE mod
USE iso_fortran_env, ONLY : rkind => real64
IMPLICIT NONE
!
LOGICAL, PARAMETER :: nldebug=.FALSE.
REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0)
CONTAINS
END MODULE mod
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
PROGRAM main
USE mpi
USE pputils2, ONLY : dist1d
USE gvector, ONLY : gvector_2d, ASSIGNMENT(=), OPERATOR(-)
USE parmg, ONLY : exchange, norm_vec
USE stencil, ONLY : stencil_2d, init, laplacian, putmat, OPERATOR(*)
USE mod
IMPLICIT NONE
!
INTEGER, PARAMETER :: ndims=2
!
INTEGER :: me, neighs(4), npes, ierr
INTEGER, DIMENSION(ndims) :: dims=[0,0]
INTEGER, DIMENSION(ndims) :: coords, comm1d
LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.]
LOGICAL :: reorder =.FALSE.
INTEGER :: comm_cart
!
INTEGER :: nx=4, ny=4 ! Number of intervals
INTEGER, DIMENSION(ndims) :: e, s, g, npt_glob, npt_loc
!
REAL(rkind), ALLOCATABLE :: xgrid(:), ygrid(:)
TYPE(gvector_2d) :: arr, fexact
TYPE(gvector_2d) :: barr1, barr2, barr3
REAL(rkind) :: dx, dy
REAL(rkind) :: err
INTEGER, DIMENSION(5,2) :: id ! 5-point stencil
INTEGER :: npoints
TYPE(stencil_2d) :: mat
INTEGER :: i, j
!
NAMELIST /in/ nx, ny
!================================================================================
! 1.0 Prologue
!
! 2D process grid
CALL mpi_init(ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
CALL mpi_dims_create(npes, ndims, dims, ierr)
CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,&
& ierr)
!
CALL mpi_comm_rank(comm_cart, me, ierr)
CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr)
CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr)
CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr)
!
CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr)
CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr)
!
! Read problem inputs
IF(me.EQ.0) THEN
READ(*,in)
WRITE(*,in)
END IF
CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr)
CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr)
!================================================================================
! 2.0 2d Grid construction
!
! Partition 2D grid
npt_glob(1) = nx+1
npt_glob(2) = ny+1
DO i=1,ndims
CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i))
e(i) = s(i) + npt_loc(i) - 1
END DO
WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', &
& (coords(i),s(i),e(i),i=1,ndims)
!
! Global mesh
dx = 1.0d0/REAL(nx)
dy = 1.0d0/REAL(ny)
ALLOCATE(xgrid(0:nx))
ALLOCATE(ygrid(0:ny))
xgrid = [ (i*dx, i=0,nx) ]
ygrid = [ (i*dy, i=0,ny) ]
!================================================================================
! 3.0 FD Laplacian
!
id=RESHAPE([ 0, -1, 0, 1, 0, &
0, 0,-1, 0, 1], &
[5,2])
npoints = 5
CALL init(s, e, id, .FALSE., mat, comm_cart)
!
CALL laplacian(dx, dy, mat)
!================================================================================
! 4.0 Check matrice-vector product
!
! Local arrays with ghost cells
g = [1,1]
arr = gvector_2d(s, e, g)
barr1 = gvector_2d(s, e, g)
barr2 = gvector_2d(s, e, g)
barr3 = gvector_2d(s, e, g)
fexact = gvector_2d(s, e, g)
!
! Constant vector => Laplacian = 0
arr = 1.0d0
CALL exchange(comm_cart, arr)
barr1 = mat*arr
IF(s(1).EQ.0) barr1%val(0,:) = 0.0 ! discard boundary values
IF(e(1).EQ.nx) barr1%val(nx,:) = 0.0
IF(s(2).EQ.0) barr1%val(:,0) = 0.0
IF(e(2).EQ.ny) barr1%val(:,ny) = 0.0
err = norm_vec(barr1, comm_cart, root=0)
IF(me.EQ.0) THEN
WRITE(*,'(/a,1pe12.3)') 'Constant vector: ||B1|| =', err
END IF
!
! Bilinear vector => Laplacian = 0
DO j=s(2),e(2)
DO i=s(1),e(1)
arr%val(i,j) = xgrid(i)*ygrid(j)
END DO
END DO
CALL exchange(comm_cart, arr)
barr2 = mat*arr
IF(s(1).EQ.0) barr2%val(0,:) = 0.0 ! discard boundary values
IF(e(1).EQ.nx) barr2%val(nx,:) = 0.0
IF(s(2).EQ.0) barr2%val(:,0) = 0.0
IF(e(2).EQ.ny) barr2%val(:,ny) = 0.0
err = norm_vec(barr2, comm_cart, root=0)
IF(me.EQ.0) THEN
WRITE(*,'(a,1pe12.3)') 'Bilinear vector: ||B2|| =', err
END IF
!
! Biquadratic vector => Laplacian = fexact
DO j=s(2),e(2)
DO i=s(1),e(1)
arr%val(i,j) = (xgrid(i)*ygrid(j))**2/4.0d0
fexact%val(i,j) = (xgrid(i)**2 + ygrid(j)**2)/2.0d0
END DO
END DO
CALL exchange(comm_cart, arr)
CALL exchange(comm_cart, fexact)
barr3 = mat*arr - fexact
IF(s(1).EQ.0) barr3%val(0,:) = 0.0 ! discard boundary values
IF(e(1).EQ.nx) barr3%val(nx,:) = 0.0
IF(s(2).EQ.0) barr3%val(:,0) = 0.0
IF(e(2).EQ.ny) barr3%val(:,ny) = 0.0
err = norm_vec(barr3, comm_cart, root=0)
IF(me.EQ.0) THEN
WRITE(*,'(a,1pe12.3)') 'Biquadratic vector: ||B3|| =', err
END IF
!================================================================================
! 9.0 Epilogue
CALL h5file
CALL MPI_FINALIZE(ierr)
!
CONTAINS
SUBROUTINE h5file
USE futils
CHARACTER(len=128) :: file='test_stencilg.h5'
INTEGER :: fid
CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart)
CALL putarr(fid, '/xgrid', xgrid, ionode=0) ! only rank 0 does IO
CALL putarr(fid, '/ygrid', ygrid, ionode=0) ! only rank 0 does IO
CALL putarrnd(fid, '/arr', arr%val,(/1,2/), garea=g)
CALL putarrnd(fid, '/barr1', barr1%val,(/1,2/), garea=g)
CALL putarrnd(fid, '/barr2', barr2%val,(/1,2/), garea=g)
CALL putarrnd(fid, '/barr3', barr3%val,(/1,2/), garea=g)
CALL putmat(fid, '/MAT', mat)
CALL closef(fid)
END SUBROUTINE h5file
!
END PROGRAM main
diff --git a/multigrid/src/test_transf2d.f90 b/multigrid/src/test_transf2d.f90
index a126b7e..82098d7 100644
--- a/multigrid/src/test_transf2d.f90
+++ b/multigrid/src/test_transf2d.f90
@@ -1,301 +1,301 @@
!>
!> @file test_transf2d.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test 2d multigrid
!
USE multigrid
USE csr
IMPLICIT NONE
INCLUDE 'mpif.h'
!
DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
INTEGER, DIMENSION(2) :: n, nidbas, ngauss, alpha
DOUBLE PRECISION :: kx=4.d0, ky=3.d0, sigma=10.0d0
INTEGER :: levels=1
DOUBLE PRECISION :: omega=2.0d0/3.0d0
!
DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:)
DOUBLE PRECISION :: dx, dy
INTEGER :: ix, iy
!
DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:)
DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:)
DOUBLE PRECISION, ALLOCATABLE :: errdisc(:), resid(:)
!
DOUBLE PRECISION, ALLOCATABLE, TARGET :: fcoarse(:,:)
DOUBLE PRECISION, POINTER :: fcoarse_1d(:)
DOUBLE PRECISION, ALLOCATABLE, TARGET :: vfine(:,:)
DOUBLE PRECISION, POINTER :: vfine_1d(:)
DOUBLE PRECISION, ALLOCATABLE :: vfine_grid(:,:)
DOUBLE PRECISION, ALLOCATABLE :: err_restrict(:), err_prolong(:), &
& disc_err_prolong(:)
!
INTEGER :: ierr, me
INTEGER :: l, nterms
INTEGER :: its
!
TYPE(grid2d), ALLOCATABLE :: grids(:)
!
NAMELIST /newrun/ n, nidbas, ngauss, kx, ky, sigma, alpha, levels
!--------------------------------------------------------------------------------
! 1. Prologue
!
CALL mpi_init(ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
!
! Inputs
!
n = (/8, 8/)
nidbas=(/3,3/)
ngauss=(/2,2/)
alpha = (/0,0/)
kx=4
ky=3
sigma=10.0d0
levels=2
!
IF(me.EQ.0) THEN
READ(*,newrun)
WRITE(*,newrun)
END IF
CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(alpha, 2, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(kx, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(ky, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(sigma, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
!
! Adjust number of levels
!
levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2)))
!
! Create grids
!
dx = 1.0d0/REAL(n(1),8)
dy = 1.0d0/REAL(n(2),8)
ALLOCATE(x(0:n(1)), y(0:n(2)))
DO ix=0,n(1)
x(ix) = ix*dx
END DO
DO iy=0,n(2)
y(iy) = iy*dy
END DO
!
ALLOCATE(grids(levels))
CALL create_grid(x, y, nidbas, ngauss, alpha, grids)
WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry'
WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels)
!
! Build FE matrices and set BC
!
nterms = 3
DO l=1,levels
CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms)
CALL ibcmat(grids(l), grids(l)%mata)
CALL to_mat(grids(l)%mata)
END DO
!
! Set BC on grid transfer matrices
!
CALL ibc_transf(grids,1,3)
CALL ibc_transf(grids,2,3)
!--------------------------------------------------------------------------------
! 1. Direct solutions
!
WRITE(*,'(/a)') 'Direct solutions for all levels ...'
ALLOCATE(errdisc(levels))
ALLOCATE(resid(levels))
WRITE(*,'(3a5,2a12)') 'l', 'nx', 'ny', 'err', 'resid'
DO l=1,levels
CALL disrhs(grids(l)%spl, grids(l)%f, rhs)
CALL ibcrhs(grids(l), grids(l)%f)
grids(l)%v = grids(l)%f
CALL direct_solve(grids(l), grids(l)%v1d, debug=.FALSE.)
errdisc(l) = disc_err(grids(l)%spl, grids(l)%v, sol)
resid(l) = residue(grids(l)%mata, grids(l)%f1d, grids(l)%v1d)
WRITE(*,'(3i5,2(1pe12.3))') l, grids(l)%n, Errdisc(l), resid(l)
END DO
!
! Grid values of direct solutions at the finest levels
ALLOCATE(sol_direct_grid(0:n(1),0:n(2)))
ALLOCATE(sol_anal_grid(0:n(1),0:n(2)))
CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, &
& [0,0], grids(1)%v)
sol_anal_grid = sol(grids(1)%x, grids(1)%y)
!--------------------------------------------------------------------------------
! 2. Test restrict and prolong
!
WRITE(*,'(/a)') 'Testing restrict and prolong...'
WRITE(*,'(3a5,3a12)') 'l', 'nx', 'ny', 'rhs', 'sol', 'disc_err'
ALLOCATE(err_restrict(2:levels))
ALLOCATE(err_prolong(2:levels))
ALLOCATE(disc_err_prolong(2:levels))
ALLOCATE(vfine_grid(0:n(1),0:n(2)))
DO l=2,levels
ALLOCATE(fcoarse(SIZE(grids(l)%f,1),SIZE(grids(l)%f,2)))
fcoarse_1d(1:SIZE(grids(l)%f1d)) => fcoarse
ALLOCATE(vfine(SIZE(grids(l-1)%v,1),SIZE(grids(l-1)%v,2)))
vfine_1d(1:SIZE(grids(l-1)%v1d)) => vfine
!
fcoarse = restrict(grids(l)%matp, grids(l-1)%f)
err_restrict(l) = MAXVAL(ABS(fcoarse_1d-grids(l)%f1d))
!
CALL direct_solve(grids(l), fcoarse_1d)
vfine = prolong(grids(l)%matp, fcoarse)
disc_err_prolong(l) = disc_err(grids(l-1)%spl, vfine, sol)
err_prolong(l) = MAXVAL(ABS(vfine_1d-grids(l-1)%v1d))
!
IF(l.EQ.2) THEN ! Grid val on finest grid
CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, vfine_grid, &
& [0,0], vfine)
END IF
!
WRITE(*,'(3i5,3(1pe12.3))') l, grids(l)%n, err_restrict(l), err_prolong(l), &
& disc_err_prolong(l)
DEALLOCATE(fcoarse)
DEALLOCATE(vfine)
END DO
!--------------------------------------------------------------------------------
! 9. Epilogue
!
! Creata HDF5 file
!
IF(me.EQ.0) CALL h5file
!
CALL mpi_finalize(ierr)
!--------------------------------------------------------------------------------
CONTAINS
!+++
FUNCTION rhs(x, y)
!
! Return problem RHS
!
DOUBLE PRECISION, INTENT(in) :: x, y
DOUBLE PRECISION :: rhs
rhs = SIN(PI*kx*x)*SIN(PI*ky*y)
END FUNCTION rhs
!+++
FUNCTION sol(x, y)
!
! Return exact problem solution
!
DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
DOUBLE PRECISION :: sol(SIZE(x),SIZE(y))
DOUBLE PRECISION :: c
INTEGER :: j
DO j=1,SIZE(y)
c = SIN(PI*ky*y(j)) / (PI**2*(kx**2+ky**2) + sigma**2)
sol(:,j) = c * SIN(PI*kx*x(:))
END DO
END FUNCTION sol
!+++
SUBROUTINE coefeq(x, y, idt, idw, c)
!
! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV)
!
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
c(1) = 1.0d0
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.0d0
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
!
c(3) = sigma
idt(3,1) = 0
idt(3,2) = 0
idw(3,1) = 0
idw(3,2) = 0
END SUBROUTINE coefeq
!+++
SUBROUTINE h5file
USE futils
CHARACTER(len=128) :: file='test_transf2d.h5'
INTEGER :: fid
INTEGER :: l
CHARACTER(len=64) :: dsname
CALL creatf(file, fid, real_prec='d')
CALL attach(fid, '/', 'NX', n(1))
CALL attach(fid, '/', 'NY', n(2))
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'KX', kx)
CALL attach(fid, '/', 'KY', ky)
CALL attach(fid, '/', 'SIGMA', sigma)
CALL attach(fid, '/', 'ALPHA1', alpha(1))
CALL attach(fid, '/', 'ALPHA2', alpha(2))
CALL attach(fid, '/', 'LEVELS', levels)
CALL creatg(fid, '/mglevels')
DO l=1,levels
WRITE(dsname,'("/mglevels/level.",i2.2)') l
CALL creatg(fid, TRIM(dsname))
CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
IF(l.GT.1) THEN
CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
END IF
CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
END DO
!
! Solutions at finest grid
!
CALL creatg(fid, '/solutions')
CALL putarr(fid, '/solutions/xg', grids(1)%x)
CALL putarr(fid, '/solutions/yg', grids(1)%y)
CALL putarr(fid, '/solutions/direct', sol_direct_grid)
CALL putarr(fid, '/solutions/anal', sol_anal_grid)
CALL putarr(fid, '/solutions/vfine', vfine_grid)
!
! Some errors
!
CALL creatg(fid, '/errors')
CALL putarr(fid, '/errors/errdisc', errdisc)
CALL putarr(fid, '/errors/resid', resid)
CALL putarr(fid, '/errors/restrict', err_restrict)
CALL putarr(fid, '/errors/prolong', err_prolong)
CALL putarr(fid, '/errors/disc_err_prolong', disc_err_prolong)
!
CALL closef(fid)
END SUBROUTINE h5file
!+++
END PROGRAM
diff --git a/multigrid/src/test_transf2d_cyl.f90 b/multigrid/src/test_transf2d_cyl.f90
index 66427e6..1a4ae79 100644
--- a/multigrid/src/test_transf2d_cyl.f90
+++ b/multigrid/src/test_transf2d_cyl.f90
@@ -1,321 +1,321 @@
!>
!> @file test_transf2d_cyl.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Test 2d multigrid
! Cylindrical case
!
USE multigrid
USE csr
IMPLICIT NONE
INCLUDE 'mpif.h'
!
DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
INTEGER, DIMENSION(2) :: n, nidbas, ngauss
INTEGER :: modem=22, modep=10
INTEGER :: levels=1
CHARACTER(len=4) :: prb='poly'
LOGICAL :: nluniq=.TRUE.
!
DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:)
DOUBLE PRECISION :: dx, dy
INTEGER :: ix, iy
!
DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:)
DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:)
DOUBLE PRECISION, ALLOCATABLE :: errdisc(:), resid(:)
!
DOUBLE PRECISION, ALLOCATABLE, TARGET :: fcoarse(:,:)
DOUBLE PRECISION, POINTER :: fcoarse_1d(:)
DOUBLE PRECISION, ALLOCATABLE, TARGET :: vfine(:,:)
DOUBLE PRECISION, POINTER :: vfine_1d(:)
DOUBLE PRECISION, ALLOCATABLE :: vfine_grid(:,:)
DOUBLE PRECISION, ALLOCATABLE :: err_restrict(:), err_prolong(:), &
& disc_err_prolong(:)
!
INTEGER :: ierr, me
INTEGER :: l, nterms
INTEGER :: its
INTEGER :: n2
!
TYPE(grid2d), ALLOCATABLE :: grids(:)
!
NAMELIST /newrun/ n, nidbas, ngauss, modem, modep, levels, prb, nluniq
!--------------------------------------------------------------------------------
! 1. Prologue
!
CALL mpi_init(ierr)
CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
!
! Inputs
!
n = (/8, 8/)
nidbas=(/3,3/)
ngauss=(/2,2/)
modem = 22
modep = 10
prb='poly'
levels=2
nluniq = .TRUE.
!
IF(me.EQ.0) THEN
READ(*,newrun)
WRITE(*,newrun)
END IF
CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(modem, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(modep, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
CALL mpi_bcast(nluniq, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
!
! Adjust number of levels
!
levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2)))
!
! Create grids
!
dx = 1.0d0/REAL(n(1),8)
dy = 2.0d0*pi/REAL(n(2),8)
ALLOCATE(x(0:n(1)), y(0:n(2)))
DO ix=0,n(1)
x(ix) = ix*dx
END DO
DO iy=0,n(2)
y(iy) = iy*dy
END DO
!
ALLOCATE(grids(levels))
CALL create_grid(x, y, nidbas, ngauss, [1, 0], grids, period=[.FALSE., .TRUE.], &
& debug_in=.FALSE.)
WRITE(*,'(5a6,a12)') 'l', 'nx', 'ny', 'rx', 'ry', 'shape of v'
WRITE(*,'(7i6)') (l, grids(l)%n, grids(l)%rank, SHAPE(grids(l)%v), l=1,levels)
!
! Build FE matrices and set BC
!
nterms = 2
DO l=1,levels
CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms)
CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq)
CALL to_mat(grids(l)%mata)
END DO
!
! Set BC on grid transfer matrices
!
CALL ibc_transf(grids, 1, 2) ! Only right boundary on r (1st dim.)
!--------------------------------------------------------------------------------
! 1. Direct solutions
!
WRITE(*,'(/a)') 'Direct solutions for all levels ...'
WRITE(*,'(3a5,2a12)') 'l', 'nx', 'ny', 'err', 'resid'
!
ALLOCATE(errdisc(levels))
ALLOCATE(resid(levels))
!
DO l=1,levels
CALL disrhs(grids(l)%spl, grids(l)%f, rhs)
CALL ibcrhs(grids(l), grids(l)%f, nluniq_in=nluniq)
!
grids(l)%v = grids(l)%f
CALL direct_solve(grids(l), grids(l)%v1d, debug=.FALSE.)
!
resid(l) = residue(grids(l)%mata, grids(l)%f1d, grids(l)%v1d)
CALL back_transf(grids(l), grids(l)%v, nluniq_in=nluniq)
errdisc(l) = disc_err(grids(l)%spl, grids(l)%v, sol)
WRITE(*,'(3i5,2(1pe12.3))') l, grids(l)%n, Errdisc(l), resid(l)
END DO
!
! Grid values of direct solutions at the finest levels
ALLOCATE(sol_direct_grid(0:n(1),0:n(2)))
ALLOCATE(sol_anal_grid(0:n(1),0:n(2)))
CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, &
& [0,0], grids(1)%v)
sol_anal_grid = sol(grids(1)%x, grids(1)%y)
!--------------------------------------------------------------------------------
! 2. Test restrict and prolong
!
WRITE(*,'(/a)') 'Testing restrict and prolong...'
WRITE(*,'(3a5,3a12)') 'l', 'nx', 'ny', 'rhs', 'sol', 'disc_err'
ALLOCATE(err_restrict(2:levels))
ALLOCATE(err_prolong(2:levels))
ALLOCATE(disc_err_prolong(2:levels))
ALLOCATE(vfine_grid(0:n(1),0:n(2)))
DO l=2,levels
ALLOCATE(fcoarse(SIZE(grids(l)%f,1),SIZE(grids(l)%f,2)))
fcoarse_1d(1:SIZE(grids(l)%f1d)) => fcoarse
ALLOCATE(vfine(SIZE(grids(l-1)%v,1),SIZE(grids(l-1)%v,2)))
vfine_1d(1:SIZE(grids(l-1)%v1d)) => vfine
!
fcoarse(:,:) = restrict_cyl(grids(l), grids(l-1)%f, nluniq)
!
err_restrict(l) = MAXVAL(ABS(fcoarse_1d-grids(l)%f1d))
!
CALL direct_solve(grids(l), fcoarse_1d)
!
vfine(:,:) = prolong_cyl(grids(l), fcoarse, nluniq)
!
CALL back_transf(grids(l-1), vfine, nluniq_in=nluniq)
disc_err_prolong(l) = disc_err(grids(l-1)%spl, vfine, sol)
err_prolong(l) = MAXVAL(ABS(vfine_1d-grids(l-1)%v1d))
!
IF(l.EQ.2) THEN ! Grid val on finest grid
CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, vfine_grid, &
& [0,0], vfine)
END IF
!
WRITE(*,'(3i5,3(1pe12.3))') l, grids(l)%n, err_restrict(l), err_prolong(l), &
& disc_err_prolong(l)
DEALLOCATE(fcoarse)
DEALLOCATE(vfine)
END DO
!--------------------------------------------------------------------------------
! 9. Epilogue
!
! Creata HDF5 file
!
IF(me.EQ.0) CALL h5file
!
CALL mpi_finalize(ierr)
!--------------------------------------------------------------------------------
CONTAINS
!+++
FUNCTION rhs(r, theta)
!
! Return problem RHS
!
USE math_util, ONLY : root_bessj
DOUBLE PRECISION, INTENT(in) :: r, theta
DOUBLE PRECISION :: rhs
DOUBLE PRECISION :: nump
!
SELECT CASE(TRIM(prb))
CASE('poly')
rhs = REAL(4*(modem+1),8)*r**(modem+1)*COS(REAL(modem,8)*theta)
CASE('bess')
nump = root_bessj(modem, modep)
rhs = r * nump**2 * BESSEL_JN(modem, nump*r) * COS(modem*theta)
END SELECT
END FUNCTION rhs
!+++
FUNCTION sol(r, theta)
!
! Return exact problem solution
!
USE math_util, ONLY : root_bessj
DOUBLE PRECISION, INTENT(in) :: r(:), theta(:)
DOUBLE PRECISION :: sol(SIZE(r),SIZE(theta))
DOUBLE PRECISION :: nump
INTEGER :: j
!
SELECT CASE(TRIM(prb))
CASE('poly')
DO j=1,SIZE(theta)
sol(:,j) = (1-r(:)**2) * r(:)**modem * COS(modem*theta(j))
END DO
CASE('bess')
nump = root_bessj(modem, modep)
DO j=1,SIZE(theta)
sol(:,j) = BESSEL_JN(modem, nump*r(:)) * COS(modem*theta(j))
END DO
END SELECT
END FUNCTION sol
!+++
SUBROUTINE coefeq(r, theta, idt, idw, c)
!
! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV)
!
DOUBLE PRECISION, INTENT(in) :: r, theta
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
c(1) = r
idt(1,1) = 1
idt(1,2) = 0
idw(1,1) = 1
idw(1,2) = 0
!
c(2) = 1.0d0/r
idt(2,1) = 0
idt(2,2) = 1
idw(2,1) = 0
idw(2,2) = 1
END SUBROUTINE coefeq
!+++
SUBROUTINE h5file
USE futils
CHARACTER(len=128) :: file='test_transf2d_cyl.h5'
INTEGER :: fid
INTEGER :: l
CHARACTER(len=64) :: dsname
CALL creatf(file, fid, real_prec='d')
CALL attach(fid, '/', 'NX', n(1))
CALL attach(fid, '/', 'NY', n(2))
CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
CALL attach(fid, '/', 'MODEM', modem)
CALL attach(fid, '/', 'MODEP', modep)
CALL attach(fid, '/', 'LEVELS', levels)
CALL creatg(fid, '/mglevels')
DO l=1,levels
WRITE(dsname,'("/mglevels/level.",i2.2)') l
CALL creatg(fid, TRIM(dsname))
CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
IF(l.GT.1) THEN
CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
END IF
CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
END DO
!
! Solutions at finest grid
!
CALL creatg(fid, '/solutions')
CALL putarr(fid, '/solutions/xg', grids(1)%x)
CALL putarr(fid, '/solutions/yg', grids(1)%y)
CALL putarr(fid, '/solutions/direct', sol_direct_grid)
CALL putarr(fid, '/solutions/anal', sol_anal_grid)
CALL putarr(fid, '/solutions/vfine', vfine_grid)
!
! Some errors
!
CALL creatg(fid, '/errors')
CALL putarr(fid, '/errors/errdisc', errdisc)
CALL putarr(fid, '/errors/resid', resid)
CALL putarr(fid, '/errors/restrict', err_restrict)
CALL putarr(fid, '/errors/prolong', err_prolong)
CALL putarr(fid, '/errors/disc_err_prolong', disc_err_prolong)
!
CALL closef(fid)
END SUBROUTINE h5file
!+++
END PROGRAM
diff --git a/multigrid/src/transfer1d.f90 b/multigrid/src/transfer1d.f90
index c245f09..319883d 100644
--- a/multigrid/src/transfer1d.f90
+++ b/multigrid/src/transfer1d.f90
@@ -1,126 +1,126 @@
!>
!> @file transfer1d.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
!--------------------------------------------------------------------------------
!--------------------------------------------------------------------------------
!--------------------------------------------------------------------------------
PROGRAM main
USE multigrid
IMPLICIT NONE
!
INTEGER :: nx=8, nidbas=1, ngauss=4, alpha=0, modem=0
DOUBLE PRECISION :: sigma=1.0d0
LOGICAL :: nlper=.FALSE.
INTEGER :: j
!
TYPE(grid1d) :: gridx(2)
TYPE(gemat) :: prolong_mat, restrict_mat, coarse_mat
DOUBLE PRECISION, ALLOCATABLE :: arow(:), temp(:,:)
!
NAMELIST /newrun/ nx, nidbas, ngauss, sigma, alpha, modem, nlper
!--------------------------------------------------------------------------------
READ(*,newrun)
WRITE(*,newrun)
!
! Set up fine and coarse grids
!
CALL create_grid(nx, nidbas, ngauss, alpha, gridx, period=nlper)
CALL printmat('** Prolongation matrix **', gridx(2)%transf)
!
! Restriction matrix = transpose of prolongation matrix
!
CALL mcopy(gridx(2)%transf, prolong_mat)
CALL init(prolong_mat%mrows, 1, restrict_mat, mrows=prolong_mat%ncols)
restrict_mat%val = TRANSPOSE(prolong_mat%val)
!
! Compute femat on fine and coarse grids
!
IF(nlper) THEN
CALL femat(gridx(1)%spl, gridx(1)%matap, coefeq)
CALL printmat('** FE matrix on fine mesh **', gridx(1)%matap)
CALL femat(gridx(2)%spl, gridx(2)%matap, coefeq)
CALL printmat('** FE matrix on coarse mesh **', gridx(2)%matap)
ELSE
CALL femat(gridx(1)%spl, gridx(1)%mata, coefeq)
CALL printmat('** FE matrix on fine mesh **', gridx(1)%mata)
CALL femat(gridx(2)%spl, gridx(2)%mata, coefeq)
CALL printmat('** FE matrix on coarse mesh **', gridx(2)%mata)
END IF
!
! Compute coarse FE matrix using transfer matrix
!
IF(nlper) THEN
CALL init(gridx(2)%matap%rank, 1, coarse_mat)
ALLOCATE(temp(gridx(1)%matap%rank,gridx(2)%matap%rank))
DO j=1,gridx(2)%matap%rank
temp(:,j) = vmx(gridx(1)%matap,prolong_mat%val(:,j))
END DO
coarse_mat%val = vmx(restrict_mat,temp)
DEALLOCATE(temp)
ELSE
CALL init(gridx(2)%mata%rank, 1, coarse_mat)
coarse_mat%val = vmx(restrict_mat,vmx(gridx(1)%mata,prolong_mat%val))
END IF
CALL printmat('** Coarse FE matrix using transfer operators **', coarse_mat)
!
! Compute the diff of Ac - R*Af*P
!
IF(nlper) THEN
coarse_mat%val = coarse_mat%val - gridx(2)%matap%val
ELSE
ALLOCATE(arow(gridx(2)%mata%rank))
DO j=1,gridx(2)%mata%rank
CALL getcol(gridx(2)%mata, j, arow)
coarse_mat%val(:,j) = coarse_mat%val(:,j)-arow(:)
END DO
DEALLOCATE(arow)
END IF
WRITE(*,'(a,1pe12.3)') 'Diff =', MAXVAL(ABS(coarse_mat%val))
CONTAINS
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(:)
SELECT CASE (alpha)
CASE(0) ! Cartesian geometry
c(1) = 1.0d0
idt(1) = 1
idw(1) = 1
c(2) = sigma
idt(2) = 0
idw(2) = 0
CASE(1)
c(1) = x
idt(1) = 1
idw(1) = 1
c(2) = modem**2/x
idt(2) = 0
idw(2) = 0
CASE default
WRITE(*,'(a,i0,a)') 'COEFEQ: alpha ', alpha, ' not defined!'
END SELECT
END SUBROUTINE coefeq
!--------------------------------------------------------------------------------
END PROGRAM main
diff --git a/multigrid/src/transfer1d_col.f90 b/multigrid/src/transfer1d_col.f90
index d0724d5..75b9d8c 100644
--- a/multigrid/src/transfer1d_col.f90
+++ b/multigrid/src/transfer1d_col.f90
@@ -1,53 +1,53 @@
!>
!> @file transfer1d_col.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Obtain grid transfer by collocation
!
USE multigrid
IMPLICIT NONE
!
INTEGER :: nx=8, nidbas=1
LOGICAL :: nlper=.TRUE.
!
TYPE(grid1d) :: gridx(2)
TYPE(gemat) :: pmat
!
NAMELIST /newrun/ nx, nidbas, nlper
!--------------------------------------------------------------------------------
READ(*,newrun)
WRITE(*,newrun)
!
CALL create_grid(nx, nidbas, 1, 0, gridx, period=nlper)
CALL printmat('** Prolongation matrix (using mass matrix) **', gridx(2)%transf)
!
CALL calc_pmat(gridx(1), gridx(2), pmat, .TRUE.)
CALL printmat('** Prolongation matrix (by collocation) **', pmat)
!
WRITE(*,'(/a,1pe12.3)') 'Max diff =', MAXVAL(ABS(pmat%val-gridx(2)%transf%val))
!
END PROGRAM main
diff --git a/multigrid/src/two_grid.f90 b/multigrid/src/two_grid.f90
index db513a8..4342a4d 100644
--- a/multigrid/src/two_grid.f90
+++ b/multigrid/src/two_grid.f90
@@ -1,189 +1,189 @@
!>
!> @file two_grid.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
PROGRAM main
!
! Check some properties of grid transfer
!
USE multigrid
USE math_util, ONLY : root_bessj
IMPLICIT NONE
!
INTEGER :: nx=8, nidbas=1, ngauss=2, alpha=0
INTEGER :: modem=22, modep=10
INTEGER :: levels=2
INTEGER :: l, nrank
DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0)
DOUBLE PRECISION, ALLOCATABLE :: v_prolong(:)
!
TYPE(grid1d) :: gridx(2)
!
NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, modem, modep, alpha
!--------------------------------------------------------------------------------
! 1. Prologue
! Inputs
!
READ(*,newrun)
WRITE(*,newrun)
!
! Create grids
!
CALL create_grid(nx, nidbas, ngauss, alpha, gridx)
WRITE(*,'(a/(20i6))') 'Number of intervals in grids', (gridx(l)%n, l=1,levels)
!
! Create FE matrice and set BC u(0)=u(1)=0
!
DO l=1,levels
CALL femat(gridx(l)%spl, gridx(l)%mata, coefeq)
!
! Left Dirichlet BC (only for Cartesian geometry)
IF(alpha .EQ. 0) THEN
CALL ibcmat(1, gridx(l)%mata)
END IF
!
! Right Dirichlet BC
CALL ibcmat(gridx(l)%mata%rank, gridx(l)%mata)
!
! BC on grid transfer operator
IF(l.GT.1) THEN
WHERE( ABS(gridx(l)%transf%val) < 1.d-8) gridx(l)%transf%val=0.0d0
IF(alpha .EQ. 0) gridx(l)%transf%val(2:,1)=0.0d0
gridx(l)%transf%val(1:gridx(l-1)%rank-1,gridx(l)%rank)=0.0d0
END IF
END DO
!
! Construct RHS and set BC only on the finest grid
!
nrank = gridx(1)%rank
CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs)
!
! Left Dirichlet BC (only for Cartesian geometry)
IF(alpha .EQ. 0) THEN
gridx(1)%f(1) = 0.0d0
END IF
!
! Right Dirichlet BC
gridx(1)%f(nrank) = 0.0d0
!
! RHS on coarse grid by restriction
!
gridx(2)%f = restrict(gridx(2)%transf,gridx(1)%f)
!--------------------------------------------------------------------------------
! 2. Direct solutions
!
DO l=1,levels
CALL direct_solve(gridx(l), gridx(l)%v)
WRITE(*,'(a,i3/(10(1pe12.3)))') 'Sol at level', l, gridx(l)%v
END DO
!
! Prolongation of coarse solution
!
ALLOCATE(v_prolong(SIZE(gridx(1)%v)))
!
v_prolong = prolong(gridx(2)%transf, gridx(2)%v)
WRITE(*,'(a,i3/(10(1pe12.3)))') 'Prolong. sol.', l, v_prolong
WRITE(*,'(a,1pe12.3)') 'Error ||V_prolong-V_fine||', normf(gridx(1)%matm, v_prolong-gridx(1)%v)
!--------------------------------------------------------------------------------
! 9. Epilogue
!
! Creata HDF5 file
!
CALL h5file
!--------------------------------------------------------------------------------
CONTAINS
SUBROUTINE h5file
USE futils
CHARACTER(len=128) :: file='two_grid.h5'
INTEGER :: fid
INTEGER :: l
CHARACTER(len=64) :: dsname
CALL creatf(file, fid, real_prec='d')
CALL attach(fid, '/', 'NX', nx)
CALL attach(fid, '/', 'NIDBAS', nidbas)
CALL attach(fid, '/', 'SIGMA', sigma)
CALL attach(fid, '/', 'KMODE', kmode)
CALL attach(fid, '/', 'ALPHA', alpha)
CALL attach(fid, '/', 'LEVELS', levels)
CALL creatg(fid, '/mglevels')
DO l=1,levels
WRITE(dsname,'("/mglevels/level.",i2.2)') l
CALL creatg(fid, TRIM(dsname))
CALL putmat(fid, TRIM(dsname)//'/mata', gridx(l)%mata)
IF(l.GT.1) THEN
CALL putarr(fid, TRIM(dsname)//'/matp', gridx(l)%transf%val)
CALL attach(fid, TRIM(dsname)//'/matp', 'M', gridx(l)%transf%mrows)
CALL attach(fid, TRIM(dsname)//'/matp', 'N', gridx(l)%transf%ncols)
END IF
CALL putarr(fid, TRIM(dsname)//'/f', gridx(l)%f)
CALL putarr(fid, TRIM(dsname)//'/v', gridx(l)%v)
END DO
CALL putarr(fid, '/v_prolong', v_prolong)
END SUBROUTINE h5file
FUNCTION rhs(x)
DOUBLE PRECISION, INTENT(in) :: x
DOUBLE PRECISION :: rhs
DOUBLE PRECISION :: nump
SELECT CASE (alpha)
CASE(0) ! Cartesian geometry
rhs = SIN(pi*kmode*x)
CASE(1) ! Cylindrical
nump = root_bessj(modem, modep)
rhs = x * nump**2 * bessel_jn(modem, nump*x)
END SELECT
END FUNCTION rhs
FUNCTION sol(x)
DOUBLE PRECISION, INTENT(in) :: x(:)
DOUBLE PRECISION :: sol(SIZE(x))
DOUBLE PRECISION :: nump
SELECT CASE (alpha)
CASE(0) ! Cartesian geometry
sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:))
CASE(1) ! Cylindrical
nump = root_bessj(modem, modep)
sol(:) = bessel_jn(modem, nump*x(:))
END SELECT
END FUNCTION sol
SUBROUTINE coefeq(x, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(:)
SELECT CASE (alpha)
CASE(0) ! Cartesian geometry
c(1) = 1.0d0
idt(1) = 1
idw(1) = 1
c(2) = sigma
idt(2) = 0
idw(2) = 0
CASE(1) ! Cylindrical
c(1) = x
idt(1) = 1
idw(1) = 1
c(2) = REAL(modem,8)**2/x
idt(2) = 0
idw(2) = 0
END SELECT
END SUBROUTINE coefeq
END PROGRAM main
diff --git a/multigrid/wk/CMakeLists.txt b/multigrid/wk/CMakeLists.txt
index 98a7017..40db313 100644
--- a/multigrid/wk/CMakeLists.txt
+++ b/multigrid/wk/CMakeLists.txt
@@ -1,53 +1,53 @@
#
# @file CMakeLists.txt
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Trach-Minh Tran
#
project(multigrid_wk)
set(MG_TESTS
transfer1d
test_relax
test_mg
test_mgp
test_csr
two_grid
test_mg2d
test_relax2d
test_transf2d
transfer1d_col
test_relax2d_cyl
test_transf2d_cyl
test_mg2d_cyl
poisson_fd
)
set(RUNTESTS "${CMAKE_CURRENT_SOURCE_DIR}/runtest.sh")
set(BIN_DIR "${multigrid_tests_BINARY_DIR}")
set(INPUT_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
foreach(prog ${MG_TESTS})
add_test(${prog} ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1
${RUNTESTS} ${BIN_DIR}/${prog} ${INPUT_DIR}
)
endforeach()
diff --git a/multigrid/wk/run.sh b/multigrid/wk/run.sh
index 152144a..40ef66a 100644
--- a/multigrid/wk/run.sh
+++ b/multigrid/wk/run.sh
@@ -1,59 +1,59 @@
#
# @file run.sh
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Trach-Minh Tran
#
#!/bin/bash
EXE=/home/ttran/bsplines/multigrid/src/poisson_mg
TMP=/misc/multigrid
[ -e $TMP ] || mkdir -p $TMP
cat > in0 <.
#
# @authors
# (in alphabetical order)
# @author Trach-Minh Tran
#
#!/bin/sh
progname=$1
input_dir=$2
prog=$(basename ${progname})
input_file=${input_dir}/${prog}.in
${progname} < $input_file
exit $?
diff --git a/pppack/CMakeLists.txt b/pppack/CMakeLists.txt
index 948a8ea..1f42ad7 100644
--- a/pppack/CMakeLists.txt
+++ b/pppack/CMakeLists.txt
@@ -1,37 +1,37 @@
#
# @file CMakeLists.txt
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Nicolas Richart
# @author Trach-Minh Tran
#
set(SRCS
bvalue.f90
interv.f90
)
add_library(pppack STATIC ${SRCS})
install(TARGETS pppack
EXPORT ${BSPLINES_EXPORT_TARGETS}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
)
diff --git a/pppack/Makefile b/pppack/Makefile
index 0a5cbcf..2c83638 100644
--- a/pppack/Makefile
+++ b/pppack/Makefile
@@ -1,72 +1,72 @@
#
# @file Makefile
#
# @brief
#
# @copyright
# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
# SPC (Swiss Plasma Center)
#
-# spclibs is free software: you can redistribute it and/or modify it under
+# SPClibs is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
-# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see .
#
# @authors
# (in alphabetical order)
# @author Sébastien Jolliet
# @author Trach-Minh Tran
#
SRCS = banfac.f90 banslv.f90 bchfac.f90 bchslv.f90 bsplpp.f90 bsplvb.f90 \
bsplvd.f90 bspp2d.f90 bvalue.f90 chol1d.f90 colloc.f90 colpnt.f90 \
cspint.f90 cubset.f90 cubslo.f90 cubspl.f90 cwidth.f90 difequ.f90 \
dtblok.f90 eqblok.f90 evnnot.f90 factrb.f90 fcblok.f90 interv.f90 \
knots.f90 l2appr.f90 l2err.f90 l2knts.f90 newnot.f90 ppvalu.f90 \
putit.f90 rvec_print.f90 sbblok.f90 setupq.f90 shiftb.f90 slvblk.f90 \
smooth.f90 spli2d.f90 spline_hermite_set.f90 spline_hermite_val.f90 \
splint.f90 splopt.f90 subbak.f90 subfor.f90 tautsp.f90 titanium.f90
OBJS = banfac.o banslv.o bchfac.o bchslv.o bsplpp.o bsplvb.o bsplvd.o \
bspp2d.o bvalue.o chol1d.o colloc.o colpnt.o cspint.o cubset.o \
cubslo.o cubspl.o cwidth.o difequ.o dtblok.o eqblok.o evnnot.o \
factrb.o fcblok.o interv.o knots.o l2appr.o l2err.o l2knts.o newnot.o \
ppvalu.o putit.o rvec_print.o sbblok.o setupq.o shiftb.o slvblk.o \
smooth.o spli2d.o spline_hermite_set.o spline_hermite_val.o splint.o \
splopt.o subbak.o subfor.o tautsp.o titanium.o
OBJS = interv.o bvalue.o
LIBS =
CC = cc
CFLAGS = -g
FC = ifort
FFLAGS = $(OPT)
F90 = $(FC)
F90FLAGS = $(FFLAGS)
LDFLAGS =
lib: libpppack.a
libpppack.a: $(OBJS)
xiar r $@ $?
ranlib $@
clean:
rm -f *.o *.mod *~ core
distclean: clean
rm -f libpppack.a a.out
.SUFFIXES:
.SUFFIXES: .o .c .f90
.f90.o:
$(F90) $(F90FLAGS) -c $<
diff --git a/pppack/banfac.f90 b/pppack/banfac.f90
index 50ecb50..3aa668a 100644
--- a/pppack/banfac.f90
+++ b/pppack/banfac.f90
@@ -1,234 +1,234 @@
!>
!> @file banfac.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine banfac ( w, nroww, nrow, nbandl, nbandu, iflag )
!*************************************************************************
!
!! BANFAC factors a banded matrix without pivoting.
!
! Discussion:
!
! BANFAC returns in W the LU-factorization, without pivoting, of
! the banded matrix A of order NROW with (NBANDL+1+NBANDU) bands
! or diagonals in the work array W.
!
! Gauss elimination without pivoting is used. The routine is
! intended for use with matrices A which do not require row
! interchanges during factorization, especially for the totally
! positive matrices which occur in spline calculations.
!
! The matrix storage mode used is the same one used by LINPACK
! and LAPACK, and results in efficient innermost loops.
!
! Explicitly, A has
!
! NBANDL bands below the diagonal
! 1 main diagonal
! NBANDU bands above the diagonal
!
! and thus, with MIDDLE=NBANDU+1,
! A(I+J,J) is in W(I+MIDDLE,J) for I=-NBANDU,...,NBANDL, J=1,...,NROW.
!
! For example, the interesting entries of a banded matrix
! matrix of order 9, with NBANDL=1, NBANDU=2:
!
! 11 12 13 0 0 0 0 0 0
! 21 22 23 24 0 0 0 0 0
! 0 32 33 34 35 0 0 0 0
! 0 0 43 44 45 46 0 0 0
! 0 0 0 54 55 56 57 0 0
! 0 0 0 0 65 66 67 68 0
! 0 0 0 0 0 76 77 78 79
! 0 0 0 0 0 0 87 88 89
! 0 0 0 0 0 0 0 98 99
!
! would appear in the first 1+1+2=4 rows of W as follows:
!
! 0 0 13 24 35 46 57 68 79
! 0 12 23 34 45 56 67 78 89
! 11 22 33 44 55 66 77 88 99
! 21 32 43 54 65 76 87 98 0
!
! All other entries of W not identified in this way with an
! entry of A are never referenced.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input/output, real ( kind = 8 ) W(NROWW,NROW).
! On input, W contains the "interesting" part of a banded
! matrix A, with the diagonals or bands of A stored in the
! rows of W, while columns of A correspond to columns of W.
! On output, W contains the LU-factorization of A into a unit
! lower triangular matrix L and an upper triangular matrix U
! (both banded) and stored in customary fashion over the
! corresponding entries of A.
!
! This makes it possible to solve any particular linear system A*X=B
! for X by the call
!
! call banslv ( w, nroww, nrow, nbandl, nbandu, b )
!
! with the solution X contained in B on return.
!
! If IFLAG=2, then one of NROW-1, NBANDL, NBANDU failed to be nonnegative,
! or else one of the potential pivots was found to be zero
! indicating that A does not have an LU-factorization. This
! implies that A is singular in case it is totally positive.
!
! Input, integer NROWW, the row dimension of the work array W.
! NROWW must be at least NBANDL+1 + NBANDU.
!
! Input, integer NROW, the number of rows in A.
!
! Input, integer NBANDL, the number of bands of A below the main diagonal.
!
! Input, integer NBANDU, the number of bands of A above the main diagonal.
!
! Output, integer IFLAG, error flag.
! 1, success.
! 2, failure, the matrix was not factored.
!
implicit none
integer nrow
integer nroww
real ( kind = 8 ) factor
integer i
integer iflag
integer j
integer k
integer middle
integer nbandl
integer nbandu
real ( kind = 8 ) pivot
real ( kind = 8 ) w(nroww,nrow)
iflag = 1
if ( nrow < 1 ) then
iflag = 2
return
end if
!
! W(MIDDLE,*) contains the main diagonal of A.
!
middle = nbandu + 1
if ( nrow == 1 ) then
if ( w(middle,nrow) == 0.0D+00 ) then
iflag = 2
end if
return
end if
!
! A is upper triangular. Check that the diagonal is nonzero.
!
if ( nbandl <= 0 ) then
do i = 1, nrow-1
if ( w(middle,i) == 0.0D+00 ) then
iflag = 2
return
end if
end do
if ( w(middle,nrow) == 0.0D+00 ) then
iflag = 2
end if
return
!
! A is lower triangular. Check that the diagonal is nonzero and
! divide each column by its diagonal.
!
else if ( nbandu <= 0 ) then
do i = 1, nrow-1
pivot = w(middle,i)
if ( pivot == 0.0D+00 ) then
iflag = 2
return
end if
do j = 1, min ( nbandl, nrow-i )
w(middle+j,i) = w(middle+j,i) / pivot
end do
end do
return
end if
!
! A is not just a triangular matrix.
! Construct the LU factorization.
!
do i = 1, nrow-1
!
! W(MIDDLE,I) is the pivot for the I-th step.
!
if ( w(middle,i) == 0.0D+00 ) then
iflag = 2
write ( *, '(a)' ) ' '
write ( *, '(a)' ) 'BANFAC - Fatal error!'
write ( *, '(a,i6)' ) ' Zero pivot encountered in column ', i
stop
end if
!
! Divide each entry in column I below the diagonal by PIVOT.
!
do j = 1, min ( nbandl, nrow-i )
w(middle+j,i) = w(middle+j,i) / w(middle,i)
end do
!
! Subtract A(I,I+K)*(I-th column) from (I+K)-th column (below row I).
!
do k = 1, min ( nbandu, nrow-i )
factor = w(middle-k,i+k)
do j = 1, min ( nbandl, nrow-i )
w(middle-k+j,i+k) = w(middle-k+j,i+k) - w(middle+j,i) * factor
end do
end do
end do
!
! Check the last diagonal entry.
!
if ( w(middle,nrow) == 0.0D+00 ) then
iflag = 2
end if
return
end
diff --git a/pppack/banslv.f90 b/pppack/banslv.f90
index a8159de..b0e1891 100644
--- a/pppack/banslv.f90
+++ b/pppack/banslv.f90
@@ -1,112 +1,112 @@
!>
!> @file banslv.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine banslv ( w, nroww, nrow, nbandl, nbandu, b )
!*************************************************************************
!
!! BANSLV solves a banded linear system X * X = B factored by BANFAC.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, real ( kind = 8 ) W(NROWW,NROW). W contains the banded matrix,
! after it has been factored by BANFAC.
!
! Input, integer NROWW, the row dimension of the work array W.
! NROWW must be at least NBANDL+1 + NBANDU.
!
! Input, integer NROW, the number of rows in A.
!
! Input, integer NBANDL, the number of bands of A below the
! main diagonal.
!
! Input, integer NBANDU, the number of bands of A above the
! main diagonal.
!
! Input/output, real ( kind = 8 ) B(NROW).
! On input, B contains the right hand side of the system to be solved.
! On output, B contains the solution, X.
!
implicit none
integer nrow
integer nroww
real ( kind = 8 ) b(nrow)
integer i
integer j
integer jmax
integer middle
integer nbandl
integer nbandu
real ( kind = 8 ) w(nroww,nrow)
middle = nbandu + 1
if ( nrow == 1 ) then
b(1) = b(1) / w(middle,1)
return
end if
!
! Forward pass
!
! For I = 1, 2, ..., NROW-1, subtract RHS(I)*(I-th column of L)
! from the right side, below the I-th row.
!
if ( 0 < nbandl ) then
do i = 1, nrow-1
jmax = min ( nbandl, nrow-i )
do j = 1, jmax
b(i+j) = b(i+j) - b(i) * w(middle+j,i)
end do
end do
end if
!
! Backward pass
!
! For I=NROW, NROW-1,...,1, divide RHS(I) by
! the I-th diagonal entry of U, then subtract
! RHS(I)*(I-th column of U) from right side, above the I-th row.
!
do i = nrow, 2, -1
b(i) = b(i) / w(middle,i)
do j = 1, min ( nbandu, i-1 )
b(i-j) = b(i-j) - b(i) * w(middle-j,i)
end do
end do
b(1) = b(1) / w(middle,1)
return
end
diff --git a/pppack/bchfac.f90 b/pppack/bchfac.f90
index c582ad0..67231c8 100644
--- a/pppack/bchfac.f90
+++ b/pppack/bchfac.f90
@@ -1,168 +1,168 @@
!>
!> @file bchfac.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine bchfac ( w, nbands, nrow, diag )
!*************************************************************************
!
!! BCHFAC constructs a Cholesky factorization of a matrix.
!
! Discussion:
!
! The factorization has the form
!
! C = L * D * L'
!
! with L unit lower triangular and D diagonal, for a given matrix C of
! order NROW, where C is symmetric positive semidefinite and banded,
! having NBANDS diagonals at and below the main diagonal.
!
! Gauss elimination is used, adapted to the symmetry and bandedness of C.
!
! Near-zero pivots are handled in a special way. The diagonal
! element C(N,N)=W(1,N) is saved initially in DIAG(N), all N.
!
! At the N-th elimination step, the current pivot element, W(1,N),
! is compared with its original value, DIAG(N). If, as the result
! of prior elimination steps, this element has been reduced by about
! a word length, (i.e., if W(1,N)+DIAG(N) <= DIAG(N)), then the pivot
! is declared to be zero, and the entire N-th row is declared to
! be linearly dependent on the preceding rows. This has the effect
! of producing X(N) = 0 when solving C*X = B for X, regardless of B.
!
! Justification for this is as follows. In contemplated applications
! of this program, the given equations are the normal equations for
! some least-squares approximation problem, DIAG(N) = C(N,N) gives
! the norm-square of the N-th basis function, and, at this point,
! W(1,N) contains the norm-square of the error in the least-squares
! approximation to the N-th basis function by linear combinations
! of the first N-1.
!
! Having W(1,N)+DIAG(N) <= DIAG(N) signifies that the N-th function
! is linearly dependent to machine accuracy on the first N-1
! functions, therefore can safely be left out from the basis of
! approximating functions.
!
! The solution of a linear system C*X=B is effected by the
! succession of the following two calls:
!
! CALL BCHFAC(W,NBANDS,NROW,DIAG)
!
! CALL BCHSLV(W,NBANDS,NROW,B,X)
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input/output, real ( kind = 8 ) W(NBANDS,NROW).
!
! On input, W contains the NBANDS diagonals in its rows,
! with the main diagonal in row 1. Precisely, W(I,J)
! contains C(I+J-1,J), I=1,...,NBANDS, J=1,...,NROW.
!
! For example, the interesting entries of a seven diagonal
! symmetric matrix C of order 9 would be stored in W as
!
! 11 22 33 44 55 66 77 88 99
! 21 32 43 54 65 76 87 98 *
! 31 42 53 64 75 86 97 * *
! 41 52 63 74 85 96 * * *
!
! Entries of the array not associated with an
! entry of C are never referenced.
!
! On output, W contains the Cholesky factorization
! C = L*D*L-transp, with W(1,I) containing 1/D(I,I) and W(I,J)
! containing L(I-1+J,J), I=2,...,NBANDS.
!
! Input, integer NBANDS, indicates the bandwidth of the
! matrix C, i.e., C(I,J) = 0 for NBANDS < ABS(I-J).
!
! Input, integer NROW, is the order of the matrix C.
!
! Work array, real ( kind = 8 ) DIAG(NROW).
!
implicit none
integer nbands
integer nrow
real ( kind = 8 ) diag(nrow)
integer i
integer imax
integer j
integer jmax
integer n
real ( kind = 8 ) ratio
real ( kind = 8 ) w(nbands,nrow)
if ( nrow <= 1 ) then
if ( 0.0D+00 < w(1,1) ) then
w(1,1) = 1.0D+00 / w(1,1)
end if
return
end if
!
! Store the diagonal.
!
diag(1:nrow) = w(1,1:nrow)
!
! Factorization.
!
do n = 1, nrow
if ( w(1,n) + diag(n) <= diag(n) ) then
w(1:nbands,n) = 0.0D+00
else
w(1,n) = 1.0D+00 / w(1,n)
imax = min ( nbands-1, nrow-n )
jmax = imax
do i = 1, imax
ratio = w(i+1,n) * w(1,n)
do j = 1, jmax
w(j,n+i) = w(j,n+i) - w(j+i,n) * ratio
end do
jmax = jmax-1
w(i+1,n) = ratio
end do
end if
end do
return
end
diff --git a/pppack/bchslv.f90 b/pppack/bchslv.f90
index 503e3e1..93e9745 100644
--- a/pppack/bchslv.f90
+++ b/pppack/bchslv.f90
@@ -1,114 +1,114 @@
!>
!> @file bchslv.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine bchslv ( w, nbands, nrow, b )
!*************************************************************************
!
!! BCHSLV solves a banded symmetric positive definite system.
!
! Discussion:
!
! The system is of the form:
!
! C * X = B
!
! and the Cholesky factorization of C has been constructed
! by BCHFAC.
!
! With the factorization
!
! C = L * D * L'
!
! available, where L is unit lower triangular and D is diagonal,
! the triangular system
!
! L * Y = B
!
! is solved for Y (forward substitution), Y is stored in B, the
! vector D**(-1)*Y is computed and stored in B, then the
! triangular system L'*X = D**(-1)*Y is solved for X
! (backsubstitution).
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, real ( kind = 8 ) W(NBANDS,NROW), the Cholesky factorization for C,
! as computed by BCHFAC.
!
! Input, integer NBANDS, the bandwidth of C.
!
! Input, integer NROW, the order of the matrix C.
!
! Input/output, real ( kind = 8 ) B(NROW).
! On input, the right hand side.
! On output, the solution.
!
implicit none
integer nbands
integer nrow
real ( kind = 8 ) b(nrow)
integer j
integer n
real ( kind = 8 ) w(nbands,nrow)
if ( nrow <= 1 ) then
b(1) = b(1) * w(1,1)
return
end if
!
! Forward substitution.
! Solve L*Y=B.
!
do n = 1, nrow
do j = 1, min(nbands-1,nrow-n)
b(j+n) = b(j+n) - w(j+1,n) * b(n)
end do
end do
!
! Backsubstitution.
! Solve L'*X=D**(-1)*Y.
!
do n = nrow, 1, -1
b(n) = b(n)*w(1,n)
do j = 1, min(nbands-1,nrow-n)
b(n) = b(n) - w(j+1,n) * b(j+n)
end do
end do
return
end
diff --git a/pppack/bsplpp.f90 b/pppack/bsplpp.f90
index 41b76bd..819f296 100644
--- a/pppack/bsplpp.f90
+++ b/pppack/bsplpp.f90
@@ -1,165 +1,165 @@
!>
!> @file bsplpp.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine bsplpp ( t, bcoef, n, k, scrtch, break, coef, l )
!*************************************************************************
!
!! BSPLPP converts from B-spline to piecewise polynomial form.
!
! Discussion:
!
! The B-spline representation of a spline is ( T, BCOEF, N, K ),
! while the piecewise polynomial representation is
! ( BREAK, COEF, L, K ).
!
! For each breakpoint interval, the K relevant B-spline coefficients
! of the spline are found and then differenced repeatedly to get the
! B-spline coefficients of all the derivatives of the spline on that
! interval.
!
! The spline and its first K-1 derivatives are then evaluated at the
! left end point of that interval, using BSPLVB repeatedly to obtain
! the values of all B-splines of the appropriate order at that point.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, real ( kind = 8 ) T(N+K), the knot sequence.
!
! Input, real ( kind = 8 ) BCOEF(N), the B spline coefficient sequence.
!
! Input, integer N, the number of B spline coefficients.
!
! Input, integer K, the order of the spline.
!
! Work array, real ( kind = 8 ) SCRTCH(K,K).
!
! Output, real ( kind = 8 ) BREAK(L+1), the piecewise polynomial breakpoint
! sequence. BREAK contains the distinct points in the
! sequence T(K),...,T(N+1)
!
! Output, real ( kind = 8 ) COEF(K,N), with COEF(I,J) = (I-1)st derivative
! of the spline at BREAK(J) from the right.
!
! Output, integer L, the number of polynomial pieces which
! make up the spline in the interval (T(K),T(N+1)).
!
implicit none
integer k
integer l
integer n
real ( kind = 8 ) bcoef(n)
real ( kind = 8 ) biatx(k)
real ( kind = 8 ) break(*)
real ( kind = 8 ) coef(k,n)
real ( kind = 8 ) diff
integer i
integer j
integer jp1
integer left
integer lsofar
real ( kind = 8 ) scrtch(k,k)
real ( kind = 8 ) sum1
real ( kind = 8 ) t(n+k)
lsofar = 0
break(1) = t(k)
do left = k, n
!
! Find the next nontrivial knot interval.
!
if ( t(left+1) == t(left) ) then
cycle
end if
lsofar = lsofar + 1
break(lsofar+1) = t(left+1)
if ( k <= 1 ) then
coef(1,lsofar) = bcoef(left)
cycle
end if
!
! Store the K B-spline coefficients relevant to current knot
! interval in SCRTCH(*,1).
!
do i = 1, k
scrtch(i,1) = bcoef(left-k+i)
end do
!
! For j=1,...,k-1, compute the k-j b-spline coefficients relevant to
! current knot interval for the j-th derivative by differencing
! those for the (j-1)st derivative, and store in scrtch(.,j+1) .
!
do jp1 = 2, k
j = jp1-1
do i = 1, k-j
diff = t(left+i)-t(left+i-(k-j))
if ( 0.0D+00 < diff ) then
scrtch(i,jp1)=((scrtch(i+1,j)-scrtch(i,j)) / diff ) &
* real ( k - j, kind = 8 )
end if
end do
end do
!
! For J=0, ..., K-1, find the values at T(left) of the j+1
! B-splines of order J+1 whose support contains the current
! knot interval from those of order J (in biatx ), then comb-
! ine with the B-spline coefficients (in scrtch(.,k-j) ) found earlier
! to compute the (k-j-1)st derivative at t(left) of the given
! spline.
!
call bsplvb ( t, 1, 1, t(left), left, biatx )
coef(k,lsofar) = scrtch(1,k)
do jp1 = 2, k
call bsplvb ( t, jp1, 2, t(left), left, biatx )
sum1 = 0.0D+00
do i = 1, jp1
sum1 = sum1 + biatx(i) * scrtch(i,k+1-jp1)
end do
coef(k+1-jp1,lsofar) = sum1
end do
end do
l = lsofar
return
end
diff --git a/pppack/bsplvb.f90 b/pppack/bsplvb.f90
index 89d9578..54e9460 100644
--- a/pppack/bsplvb.f90
+++ b/pppack/bsplvb.f90
@@ -1,170 +1,170 @@
!>
!> @file bsplvb.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine bsplvb ( t, jhigh, index, x, left, biatx )
!*************************************************************************
!
!! BSPLVB evaluates B-splines at a point X with a given knot sequence.
!
! Discusion:
!
! BSPLVB evaluates all possibly nonzero B-splines at X of order
!
! JOUT = MAX ( JHIGH, (J+1)*(INDEX-1) )
!
! with knot sequence T.
!
! The recurrence relation
!
! X - T(I) T(I+J+1) - X
! B(I,J+1)(X) = ----------- * B(I,J)(X) + --------------- * B(I+1,J)(X)
! T(I+J)-T(I) T(I+J+1)-T(I+1)
!
! is used to generate B(LEFT-J:LEFT,J+1)(X) from B(LEFT-J+1:LEFT,J)(X)
! storing the new values in BIATX over the old.
!
! The facts that
!
! B(I,1)(X) = 1 if T(I) <= X < T(I+1)
!
! and that
!
! B(I,J)(X) = 0 unless T(I) <= X < T(I+J)
!
! are used.
!
! The particular organization of the calculations follows
! algorithm 8 in chapter X of the text.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, real ( kind = 8 ) T(LEFT+JOUT), the knot sequence. T is assumed to
! be nondecreasing, and also, T(LEFT) must be strictly less than
! T(LEFT+1).
!
! Input, integer JHIGH, INDEX, determine the order
! JOUT = MAX(JHIGH,(J+1)*(INDEX-1))
! of the B-splines whose values at X are to be returned.
! INDEX is used to avoid recalculations when several
! columns of the triangular array of B-spline values are
! needed, for example, in BVALUE or in BSPLVD.
!
! If INDEX = 1, the calculation starts from scratch and the entire
! triangular array of B-spline values of orders
! 1, 2, ...,JHIGH is generated order by order, i.e.,
! column by column.
!
! If INDEX = 2, only the B-spline values of order J+1, J+2, ..., JOUT
! are generated, the assumption being that BIATX, J,
! DELTAL, DELTAR are, on entry, as they were on exit
! at the previous call. In particular, if JHIGH = 0,
! then JOUT = J+1, i.e., just the next column of B-spline
! values is generated.
!
! WARNING: the restriction JOUT <= JMAX (= 20) is
! imposed arbitrarily by the dimension statement for DELTAL
! and DELTAR, but is nowhere checked for.
!
! Input, real ( kind = 8 ) X, the point at which the B-splines
! are to be evaluated.
!
! Input, integer LEFT, an integer chosen so that
! T(LEFT) <= X <= T(LEFT+1).
!
! Output, real ( kind = 8 ) BIATX(JOUT), with BIATX(I) containing the
! value at X of the polynomial of order JOUT which agrees
! with the B-spline B(LEFT-JOUT+I,JOUT,T) on the interval
! (T(LEFT),T(LEFT+1)).
!
implicit none
integer, parameter :: jmax = 20
integer jhigh
real ( kind = 8 ) biatx(jhigh)
!!$ real ( kind = 8 ), save, dimension ( jmax ) :: deltal
!!$ real ( kind = 8 ), save, dimension ( jmax ) :: deltar
real ( kind = 8 ), dimension ( jmax ) :: deltal
real ( kind = 8 ), dimension ( jmax ) :: deltar
integer i
integer index
!!$ integer, save :: j = 1
integer :: j
integer left
real ( kind = 8 ) saved
real ( kind = 8 ) t(left+jhigh)
real ( kind = 8 ) term
real ( kind = 8 ) x
! Forces starting always from scratch!
!!$ if ( index == 1 ) then
j = 1
biatx(1) = 1.0D+00
if ( jhigh <= j ) then
return
end if
!!$ end if
if ( t(left+1) <= t(left) ) then
write ( *, '(a)' ) ' '
write ( *, '(a)' ) 'BSPLVB - Fatal error!'
write ( *, '(a)' ) ' It is required that T(LEFT) < T(LEFT+1).'
write ( *, '(a,i6)' ) ' But LEFT = ', left
write ( *, '(a,g14.6)' ) ' T(LEFT) = ', t(left)
write ( *, '(a,g14.6)' ) ' T(LEFT+1) = ', t(left+1)
stop
end if
do
deltar(j) = t(left+j) - x
deltal(j) = x - t(left+1-j)
saved = 0.0D+00
do i = 1, j
term = biatx(i) / ( deltar(i) + deltal(j+1-i) )
biatx(i) = saved + deltar(i) * term
saved = deltal(j+1-i) * term
end do
biatx(j+1) = saved
j = j + 1
if ( jhigh <= j ) then
exit
end if
end do
return
end
diff --git a/pppack/bsplvd.f90 b/pppack/bsplvd.f90
index 82d203c..e4b831f 100644
--- a/pppack/bsplvd.f90
+++ b/pppack/bsplvd.f90
@@ -1,189 +1,189 @@
!>
!> @file bsplvd.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine bsplvd ( t, k, x, left, a, dbiatx, nderiv )
!*************************************************************************
!
!! BSPLVD calculates the nonvanishing B-splines and derivatives at X.
!
! Discussion:
!
! Values at X of all the relevant B-splines of order K, K-1,..., K+1-NDERIV
! are generated via BSPLVB and stored temporarily in DBIATX.
!
! Then, the B-spline coefficients of the required derivatives
! of the B-splines of interest are generated by differencing,
! each from the preceding one of lower order, and combined with
! the values of B-splines of corresponding order in DBIATX
! to produce the desired values.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, real ( kind = 8 ) T(LEFT+K), the knot sequence. It is assumed that
! T(LEFT) < T(LEFT+1). Also, the output is correct only if
! T(LEFT) <= X <= T(LEFT+1) .
!
! Input, integer K, the order of the B-splines to be evaluated.
!
! Input, real ( kind = 8 ) X, the point at which these values are sought.
!
! Input, integer LEFT, indicates the left endpoint of the interval of
! interest. The K B-splines whose support contains the interval
! (T(LEFT), T(LEFT+1)) are to be considered.
!
! Workspace, real ( kind = 8 ) A(K,K).
!
! Output, real ( kind = 8 ) DBIATX(K,NDERIV). DBIATX(I,M) contains
! the value of the (M-1)st derivative of the (LEFT-K+I)-th B-spline
! of order K for knot sequence T, I=M,...,K, M=1,...,NDERIV.
!
! Input, integer NDERIV, indicates that values of
! B-splines and their derivatives up to but not
! including the NDERIV-th are asked for.
!
implicit none
integer k
integer left
integer nderiv
real ( kind = 8 ) a(k,k)
real ( kind = 8 ) dbiatx(k,nderiv)
real ( kind = 8 ) factor
real ( kind = 8 ) fkp1mm
integer i
integer ideriv
integer il
integer j
integer jlow
integer jp1mid
integer ldummy
integer m
integer mhigh
real ( kind = 8 ) sum1
real ( kind = 8 ) t(left+k)
real ( kind = 8 ) x
mhigh = max ( min ( nderiv, k ), 1 )
!
! MHIGH is usually equal to nderiv.
!
call bsplvb ( t, k+1-mhigh, 1, x, left, dbiatx )
if ( mhigh == 1 ) then
return
end if
!
! The first column of DBIATX always contains the B-spline values
! for the current order. These are stored in column K+1-current
! order before BSPLVB is called to put values for the next
! higher order on top of it.
!
ideriv = mhigh
do m = 2, mhigh
jp1mid = 1
do j = ideriv, k
dbiatx(j,ideriv) = dbiatx(jp1mid,1)
jp1mid = jp1mid+1
end do
ideriv = ideriv-1
call bsplvb(t,k+1-ideriv,2,x,left,dbiatx)
end do
!
! At this point, b(left-k+i, k+1-j)(x) is in dbiatx(i,j) for
! i=j,...,k and j=1,...,mhigh ('=' nderiv). in particular, the
! first column of dbiatx is already in final form. to obtain cor-
! ??? LOST A LINE ???
! rate their b-repr. by differencing, then evaluate at x.
!
jlow = 1
do i = 1, k
do j = jlow,k
a(j,i) = 0.0D+00
end do
jlow = i
a(i,i) = 1.0D+00
end do
!
! At this point, a(.,j) contains the b-coefficients for the J-th of the
! k b-splines of interest here.
!
do m = 2, mhigh
fkp1mm = real ( k + 1 - m, kind = 8 )
il = left
i = k
!
! For j=1,...,k, construct b-coefficients of (m-1)st derivative of
! b-splines from those for preceding derivative by differencing
! and store again in a(.,j) . The fact that a(i,j)=0 for
! i < j is used.
!
do ldummy = 1, k+1-m
factor = fkp1mm/(t(il+k+1-m)-t(il))
!
! The assumption that t(left) < t(left+1) makes denominator
! in factor nonzero.
!
do j = 1, i
a(i,j) = (a(i,j)-a(i-1,j))*factor
end do
il = il-1
i = i-1
end do
!
! For i=1,...,k, combine b-coefficients a(.,i) with B-spline values
! stored in dbiatx(.,m) to get value of (m-1)st derivative of
! i-th b-spline (of interest here) at x , and store in
! dbiatx(i,m). storage of this value over the value of a b-spline
! of order m there is safe since the remaining b-spline derivat-
! ives of the same order do not use this value due to the fact
! that a(j,i)=0 for j < i.
!
do i = 1, k
sum1 = 0.0D+00
jlow = max(i,m)
do j = jlow,k
sum1 = sum1 + a(j,i) * dbiatx(j,m)
end do
dbiatx(i,m) = sum1
end do
end do
return
end
diff --git a/pppack/bspp2d.f90 b/pppack/bspp2d.f90
index 59db260..e50c5da 100644
--- a/pppack/bspp2d.f90
+++ b/pppack/bspp2d.f90
@@ -1,203 +1,203 @@
!>
!> @file bspp2d.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine bspp2d ( t, bcoef, n, k, m, scrtch, break, coef, l )
!*************************************************************************
!
!! BSPP2D converts from B-spline to piecewise polynomial representation.
!
! Discussion:
!
! The B-spline representation
!
! T, BCOEF(.,J), N, K
!
! is converted to its piecewise polynomial representation
!
! BREAK, COEF(J,.,.), L, K, J=1, ..., M.
!
! This is an extended version of BSPLPP for use with tensor products.
!
! For each breakpoint interval, the K relevant B-spline
! coefficients of the spline are found and then differenced
! repeatedly to get the B-spline coefficients of all the
! derivatives of the spline on that interval.
!
! The spline and its first K-1 derivatives are then evaluated
! at the left endpoint of that interval, using BSPLVB
! repeatedly to obtain the values of all B-splines of the
! appropriate order at that point.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, real ( kind = 8 ) T(N+K), the knot sequence.
!
! Input, real ( kind = 8 ) BCOEF(N,M). For each J, B(*,J) is the
! B-spline coefficient sequence, of length N.
!
! Input, integer N, the length of BCOEF.
!
! Input, integer K, the order of the spline.
!
! Input, integer M, the number of data sets.
!
! Work array, real ( kind = 8 ) SCRTCH(K,K,M).
!
! Output, real ( kind = 8 ) BREAK(L+1), the breakpoint sequence
! containing the distinct points in the sequence T(K),...,T(N+1)
!
! Output, real ( kind = 8 ) COEF(M,K,N), with COEF(MM,I,J) = the (I-1)st
! derivative of the MM-th spline at BREAK(J) from the right, MM=1, ..., M.
!
! Output, integer L, the number of polynomial pieces which make up the
! spline in the interval (T(K), T(N+1)).
!
implicit none
integer k
integer m
integer n
real ( kind = 8 ) bcoef(n,m)
real ( kind = 8 ) biatx(k)
real ( kind = 8 ) break(*)
real ( kind = 8 ) coef(m,k,*)
real ( kind = 8 ) diff
real ( kind = 8 ) fkmj
integer i
integer j
integer jp1
integer kmj
integer l
integer left
integer lsofar
integer mm
real ( kind = 8 ) scrtch(k,k,m)
real ( kind = 8 ) sum1
real ( kind = 8 ) t(n+k)
lsofar = 0
break(1) = t(k)
do left = k, n
!
! Find the next nontrivial knot interval.
!
if ( t(left+1) == t(left) ) then
cycle
end if
lsofar = lsofar+1
break(lsofar+1) = t(left+1)
if ( k <= 1 ) then
do mm = 1, m
coef(mm,1,lsofar) = bcoef(left,mm)
end do
cycle
end if
!
! Store the K b-spline coefficients relevant to current knot interval
! in scrtch(.,1) .
!
do i = 1, k
do mm = 1, m
scrtch(i,1,mm) = bcoef(left-k+i,mm)
end do
end do
!
! for j=1,...,k-1, compute the k-j b-spline coefficients relevant to
! current knot interval for the j-th derivative by differencing
! those for the (j-1)st derivative, and store in scrtch(.,j+1) .
!
do jp1 = 2, k
j = jp1-1
kmj = k-j
fkmj = real ( k - j, kind = 8 )
do i = 1, k-j
diff = (t(left+i)-t(left+i-kmj))/fkmj
if ( 0.0D+00 < diff ) then
do mm = 1, m
scrtch(i,jp1,mm)=(scrtch(i+1,j,mm)-scrtch(i,j,mm))/diff
end do
end if
end do
end do
!
! For j=0, ..., k-1, find the values at T(left) of the j+1
! b-splines of order j+1 whose support contains the current
! knot interval from those of order j (in biatx ), then comb-
! ine with the b-spline coefficients (in scrtch(.,k-j) ) found earlier
! to compute the (k-j-1)st derivative at t(left) of the given
! spline.
!
call bsplvb ( t, 1, 1, t(left), left, biatx )
do mm = 1, m
coef(mm,k,lsofar) = scrtch(1,k,mm)
end do
do jp1 = 2, k
call bsplvb (t,jp1,2,t(left),left,biatx)
kmj = k+1-jp1
do mm = 1, m
sum1 = 0.0D+00
do i = 1, jp1
sum1 = sum1 + biatx(i) * scrtch(i,kmj,mm)
end do
coef(mm,kmj,lsofar) = sum1
end do
end do
end do
l = lsofar
return
end
diff --git a/pppack/bvalue.f90 b/pppack/bvalue.f90
index ead864f..04af9b5 100644
--- a/pppack/bvalue.f90
+++ b/pppack/bvalue.f90
@@ -1,226 +1,226 @@
!>
!> @file bvalue.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
function bvalue ( t, bcoef, n, k, x, jderiv )
!*************************************************************************
!
!! BVALUE evaluates a derivative of a spline from its B-spline representation.
!
! Discussion:
!
! The spline is taken to be continuous from the right.
!
! The nontrivial knot interval (T(I),T(I+1)) containing X is
! located with the aid of INTERV. The K B-spline coefficients
! of F relevant for this interval are then obtained from BCOEF,
! or are taken to be zero if not explicitly available, and are
! then differenced JDERIV times to obtain the B-spline
! coefficients of (D**JDERIV)F relevant for that interval.
!
! Precisely, with J = JDERIV, we have from X.(12) of the text that:
!
! (D**J)F = sum ( BCOEF(.,J)*B(.,K-J,T) )
!
! where
! / BCOEF(.), , J == 0
! /
! BCOEF(.,J) = / BCOEF(.,J-1) - BCOEF(.-1,J-1)
! / -----------------------------, 0 < J
! / (T(.+K-J) - T(.))/(K-J)
!
! Then, we use repeatedly the fact that
!
! sum ( A(.)*B(.,M,T)(X) ) = sum ( A(.,X)*B(.,M-1,T)(X) )
!
! with
! (X - T(.))*A(.) + (T(.+M-1) - X)*A(.-1)
! A(.,X) = ---------------------------------------
! (X - T(.)) + (T(.+M-1) - X)
!
! to write (D**J)F(X) eventually as a linear combination of
! B-splines of order 1, and the coefficient for B(I,1,T)(X)
! must then be the desired number (D**J)F(X).
! See x.(17)-(19) of text.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, real ( kind = 8 ) T(N+K), the knot sequence. T is assumed
! to be nondecreasing.
!
! Input, real ( kind = 8 ) BCOEF(N), B-spline coefficient sequence.
!
! Input, integer N, the length of BCOEF.
!
! Input, integer K, the order of the spline.
!
! Input, real ( kind = 8 ) X, the point at which to evaluate.
!
! Input, integer JDERIV, the order of the derivative to
! be evaluated. JDERIV is assumed to be zero or positive.
!
! Output, real ( kind = 8 ) BVALUE, the value of the (JDERIV)-th
! derivative of the spline at X.
!
implicit none
integer k
integer n
real ( kind = 8 ) aj(k)
real ( kind = 8 ) bcoef(n)
real ( kind = 8 ) bvalue
real ( kind = 8 ) dl(k)
real ( kind = 8 ) dr(k)
integer i
integer ilo
integer j
integer jc
integer jcmax
integer jcmin
integer jderiv
integer jj
integer mflag
real ( kind = 8 ) t(n+k)
real ( kind = 8 ) x
bvalue = 0.0D+00
if ( k <= jderiv ) then
return
end if
!
! Find I so that 1 <= i < n+k and t(i) < t(i+1) and t(i) <= x < t(i+1).
!
! If no such i can be found, X lies
! outside the support of the spline F and bvalue=0.
! (the asymmetry in this choice of i makes F rightcontinuous)
!
call interv ( t, n+k, x, i, mflag )
if ( mflag /= 0 ) then
return
end if
!
! If K=1 (and jderiv = 0), bvalue = bcoef(i).
!
if ( k <= 1 ) then
bvalue = bcoef(i)
return
end if
!
! Store the K b-spline coefficients relevant for the knot interval
! (T(i),T(i+1)) in aj(1),...,aj(k) and compute dl(j)=x-t(i+1-j),
! dr(j)=T(i+j)-x, j=1,...,k-1 . set any of the aj not obtainable
! from input to zero. Set any T's not obtainable equal to T(1) or
! to T(n+k) appropriately.
!
jcmin = 1
if ( k <= i ) then
do j = 1, k-1
dl(j) = x-t(i+1-j)
end do
else
jcmin = 1-(i-k)
do j = 1, i
dl(j) = x-t(i+1-j)
end do
do j = i, k-1
aj(k-j) = 0.0D+00
dl(j) = dl(i)
end do
end if
jcmax = k
if ( i <= n ) then
go to 90
end if
jcmax = k + n - i
do j = 1, k+n-i
dr(j) = t(i+j)-x
end do
do j = k+n-i, k-1
aj(j+1) = 0.0D+00
dr(j) = dr(k+n-i)
end do
go to 110
90 continue
do j = 1, k-1
dr(j) = t(i+j)-x
end do
110 continue
do jc = jcmin, jcmax
aj(jc) = bcoef(i-k+jc)
end do
!
! Difference the coefficients JDERIV times.
!
do j = 1, jderiv
ilo = k-j
do jj = 1, k-j
aj(jj) = ((aj(jj+1)-aj(jj))/(dl(ilo)+dr(jj))) * real ( k - j, kind = 8 )
ilo = ilo-1
end do
end do
!
! Compute value at X in (t(i),t(i+1)) of jderiv-th derivative,
! given its relevant b-spline coefficients in aj(1),...,aj(k-jderiv).
!
do j = jderiv+1, k-1
ilo = k-j
do jj = 1, k-j
aj(jj) = ( aj(jj+1) * dl(ilo) + aj(jj) * dr(jj) ) &
/ ( dl(ilo) + dr(jj) )
ilo = ilo-1
end do
end do
bvalue = aj(1)
return
end
diff --git a/pppack/chol1d.f90 b/pppack/chol1d.f90
index 8a0e19b..4fdfbb4 100644
--- a/pppack/chol1d.f90
+++ b/pppack/chol1d.f90
@@ -1,146 +1,146 @@
!>
!> @file chol1d.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine chol1d ( p, v, qty, npoint, u, qu )
!*************************************************************************
!
!! CHOL1D sets up and solves linear systems needed by SMOOTH.
!
! Discussion:
!
! This routine constructs the upper three diagonals of
!
! V(I,J), I=2 to NPOINT-1, J=1,3,
!
! of the matrix
!
! 6*(1-P)*Q-transpose*(D**2)*Q + P*R.
!
! It then computes its L*L' decomposition and stores it also
! in V, then applies forward and backsubstitution to the right side
!
! Q'*Y
!
! in QTY to obtain the solution in U.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, real ( kind = 8 ) P, ?
!
! ?put, real ( kind = 8 ) V(NPOINT,7), ?
!
! ?put, real ( kind = 8 ) QTY(NPOINT), ?
!
! Input, integer NPOINT, ?
!
! Output, real ( kind = 8 ) U(NPOINT), the solution.
!
! Output, real ( kind = 8 ) QU(NPOINT), the value of Q * U.
!
implicit none
integer npoint
integer i
real ( kind = 8 ) p
real ( kind = 8 ) qty(npoint)
real ( kind = 8 ) qu(npoint)
real ( kind = 8 ) u(npoint)
real ( kind = 8 ) v(npoint,7)
real ( kind = 8 ) prev
real ( kind = 8 ) ratio
real ( kind = 8 ) six1mp
real ( kind = 8 ) twop
!
! Construct 6*(1-p)*q'*(d**2)*q + p*r
!
six1mp = 6.0D+00 * ( 1.0D+00 - p )
twop = 2.0D+00 * p
do i = 2, npoint-1
v(i,1) = six1mp * v(i,5)+twop*(v(i-1,4)+v(i,4))
v(i,2) = six1mp * v(i,6)+p*v(i,4)
v(i,3) = six1mp * v(i,7)
end do
if ( npoint < 4 ) then
u(1) = 0.0D+00
u(2) = qty(2) / v(2,1)
u(3) = 0.0D+00
!
! Factorization
!
else
do i = 2, npoint-2
ratio = v(i,2)/v(i,1)
v(i+1,1) = v(i+1,1)-ratio*v(i,2)
v(i+1,2) = v(i+1,2)-ratio*v(i,3)
v(i,2) = ratio
ratio = v(i,3)/v(i,1)
v(i+2,1) = v(i+2,1)-ratio*v(i,3)
v(i,3) = ratio
end do
!
! Forward substitution
!
u(1) = 0.0D+00
v(1,3) = 0.0D+00
u(2) = qty(2)
do i = 2, npoint-2
u(i+1) = qty(i+1)-v(i,2)*u(i)-v(i-1,3)*u(i-1)
end do
!
! Back substitution.
!
u(npoint) = 0.0D+00
u(npoint-1) = u(npoint-1) / v(npoint-1,1)
do i = npoint-2, 2, -1
u(i) = u(i)/v(i,1)-u(i+1)*v(i,2)-u(i+2)*v(i,3)
end do
end if
!
! Construct Q*U.
!
prev = 0.0D+00
do i = 2, npoint
qu(i) = (u(i)-u(i-1))/v(i-1,4)
qu(i-1) = qu(i)-prev
prev = qu(i)
end do
qu(npoint) = -qu(npoint)
return
end
diff --git a/pppack/colloc.f90 b/pppack/colloc.f90
index df60a0e..26b0031 100644
--- a/pppack/colloc.f90
+++ b/pppack/colloc.f90
@@ -1,275 +1,275 @@
!>
!> @file colloc.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine colloc ( aleft, aright, lbegin, iorder, ntimes, addbrk, &
relerr )
!*************************************************************************
!
!! COLLOC solves an ordinary differential equation by collocation.
!
! Method:
!
! The M-th order ordinary differential equation with M side
! conditions, to be specified in subroutine DIFEQU, is solved
! approximately by collocation.
!
! The approximation F to the solution G is piecewise polynomial of order
! k+m with L pieces and M-1 continuous derivatives. F is determined by
! the requirement that it satisfy the differential equation at K points
! per interval (to be specified in COLPNT ) and the M side conditions.
!
! This usually nonlinear system of equations for f is solved by
! Newton's method. the resulting linear system for the b-coefficients of an
! iterate is constructed appropriately in eqblok and then solved
! in slvblk, a program designed to solve almost block
! diagonal linear systems efficiently.
!
! There is an opportunity to attempt improvement of the breakpoint
! sequence (both in number and location) through use of NEWNOT.
!
! Printed output consists of the pp-representation of the approximate
! solution, and of the error at selected points.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, real ( kind = 8 ) ALEFT, ARIGHT, the endpoints of the interval.
!
! Input, integer LBEGIN, the initial number of polynomial pieces
! in the approximation. A uniform breakpoint sequence will be chosen.
!
! Input, integer IORDER, the order of the polynomial pieces to be
! used in the approximation
!
! Input, integer NTIMES, the number of passes to be made through NEWNOT.
!
! addbrk the number (possibly fractional) of breaks to be added per
! pass through newnot. e.g., if addbrk=.33334, then a break-
! point will be added at every third pass through newnot.
!
! relerr a tolerance. Newton iteration is stopped if the difference
! between the b-coefficients of two successive iterates is no more
! than relerr*(absolute largest b-coefficient).
!
implicit none
integer, parameter :: npiece = 100
integer, parameter :: ndim = 200
integer, parameter :: ncoef = 2000
integer, parameter :: lenblk = 2000
real ( kind = 8 ) a(ndim)
real ( kind = 8 ) addbrk
real ( kind = 8 ) aleft
real ( kind = 8 ) amax
real ( kind = 8 ) aright
real ( kind = 8 ) asave(ndim)
real ( kind = 8 ) b(ndim)
real ( kind = 8 ) bloks(lenblk)
real ( kind = 8 ) break
real ( kind = 8 ) coef
real ( kind = 8 ) dx
real ( kind = 8 ) err
integer i
integer iflag
integer ii
integer integs(3,npiece)
integer iorder
integer iside
integer itemps(ndim)
integer iter
integer itermx
integer j
integer k
integer kpm
integer l
integer lbegin
integer lnew
integer m
integer n
integer nbloks
integer nt
integer ntimes
real ( kind = 8 ) relerr
real ( kind = 8 ) rho
real ( kind = 8 ) t(ndim)
real ( kind = 8 ) templ(lenblk)
real ( kind = 8 ) temps(ndim)
real ( kind = 8 ) xside
equivalence (bloks,templ)
common /approx/ break(npiece),coef(ncoef),l,kpm
common /side/ m,iside,xside(10)
common /other/ itermx,k,rho(19)
kpm = iorder
if ( ncoef < lbegin * kpm ) then
go to 120
end if
!
! Set the various parameters concerning the particular dif.equ.
! including a first approximation in case the de is to be solved by
! iteration ( 0 < itermx ).
!
call difequ ( 1, temps(1), temps )
!
! Obtain the K collocation points for the standard interval.
!
k = kpm-m
call colpnt(k,rho)
!
! The following five statements could be replaced by a read in or-
! der to obtain a specific (nonuniform) spacing of the breakpnts.
!
dx = (aright-aleft) / real ( lbegin, kind = 8 )
temps(1) = aleft
do i = 2, lbegin
temps(i) = temps(i-1)+dx
end do
temps(lbegin+1) = aright
!
! Generate the required knots t(1),...,t(n+kpm).
!
call knots ( temps, lbegin, kpm, t, n )
nt = 1
!
! Generate the almost block diagonal coefficient matrix bloks and
! right side b from collocation equations and side conditions.
! then solve via slvblk , obtaining the b-representation of the
! approximation in T, A, N, KPM.
!
20 continue
call eqblok ( t, n, kpm, temps, a, bloks, lenblk, integs, nbloks, b )
call slvblk ( bloks, integs, nbloks, b, itemps, a, iflag )
iter = 1
if ( itermx <= 1 ) then
go to 60
end if
!
! Save b-spline coefficients of current approx. in asave , then get new
! approx. and compare with old. if coefficients are more than relerr
! apart (relatively) or if number of iterations is less than itermx ,
! continue iterating.
!
30 continue
call bsplpp(t,a,n,kpm,templ,break,coef,l)
do i = 1, n
asave(i) = a(i)
end do
call eqblok ( t, n, kpm, temps, a, bloks, lenblk, integs, nbloks, b )
call slvblk(bloks,integs,nbloks,b,itemps,a,iflag)
err = 0.0D+00
amax = 0.0D+00
do i = 1, n
amax = max ( amax, abs ( a(i) ) )
err = max ( err, abs ( a(i)-asave(i) ) )
end do
if ( err <= relerr*amax ) then
go to 60
end if
iter = iter + 1
if ( iter < itermx ) then
go to 30
end if
!
! Iteration (if any) completed. print out approx. based on current
! breakpoint sequence, then try to improve the sequence.
!
60 continue
write(*,70)kpm,l,n,(break(i),i=2,l)
70 format (' approximation from a space of splines of order',i3, &
' on ',i3,' intervals,'/' of dimension',i4,'. breakpoints -'/ &
(5e20.10))
if ( 0 < itermx ) then
write(*,*)' '
write(*,*)'Results on interation ',iter
end if
call bsplpp(t,a,n,kpm,templ,break,coef,l)
write ( *, * ) ' '
write ( *, * ) 'The piecewise polynomial representation of the approximation:'
write ( *, * ) ' '
do i = 1, l
ii = ( i - 1 ) * kpm
write(*,'(f9.3,e13.6,10e11.3)')break(i),(coef(ii+j),j=1,kpm)
end do
!
! The following call is provided here for possible further analysis
! of the approximation specific to the problem being solved.
! it is, of course, easily omitted.
!
call difequ ( 4, temps(1), temps )
if ( ntimes < nt ) then
return
end if
!
! From the pp-rep. of the current approx., obtain in NEWNOT a new
! (and possibly better) sequence of breakpoints, adding (on the
! average) ADDBRK breakpoints per pass through NEWNOT.
!
lnew = lbegin + int ( real ( nt, kind = 8 ) * addbrk )
if ( ncoef < lnew * kpm ) then
go to 120
end if
call newnot(break,coef,l,kpm,temps,lnew,templ)
call knots ( temps, lnew, kpm, t, n )
nt = nt+1
go to 20
120 continue
write(*,*)' '
write(*,*)'COLLOC - Fatal error!'
write(*,*)' The assigned dimension for COEF is ',ncoef
write(*,*)' but this is too small.'
stop
end
diff --git a/pppack/colpnt.f90 b/pppack/colpnt.f90
index 8bfb5ad..37ef621 100644
--- a/pppack/colpnt.f90
+++ b/pppack/colpnt.f90
@@ -1,117 +1,117 @@
!>
!> @file colpnt.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine colpnt ( k, rho )
!*************************************************************************
!
!! COLPNT supplies collocation points.
!
! Discussion:
!
! The collocation points are for the standard interval (-1,1) as the
! zeros of the Legendre polynomial of degree K, provided K <= 8.
!
! Otherwise, uniformly spaced points are given.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, integer K, the number of collocation points desired.
!
! Output, real ( kind = 8 ) RHO(K), the collocation points.
!
implicit none
integer k
integer j
real ( kind = 8 ) rho(k)
if ( k == 1 ) then
rho(1) = 0.0D+00
else if ( k == 2 ) then
rho(1) = -0.577350269189626D+00
rho(2) = 0.577350269189626D+00
else if ( k == 3 ) then
rho(1) = -0.774596669241483D+00
rho(2) = 0.0
rho(3) = 0.774596669241483D+00
else if ( k == 4 ) then
rho(1) = -0.861136311594053D+00
rho(2) = -0.339981043584856D+00
rho(3) = 0.339981043584856D+00
rho(4) = 0.861136311594053D+00
else if ( k == 5 ) then
rho(1) = -0.906179845938664D+00
rho(2) = -0.538469310105683D+00
rho(3) = 0.0D+00
rho(4) = 0.538469310105683D+00
rho(5) = 0.906179845938664D+00
else if ( k == 6 ) then
rho(1) = -0.932469514203152D+00
rho(2) = -0.661209386466265D+00
rho(3) = -0.238619186083197D+00
rho(4) = 0.238619186083197D+00
rho(5) = 0.661209386466265D+00
rho(6) = 0.932469514203152D+00
else if ( k == 7 ) then
rho(5) = 0.405845151377397D+00
rho(3) = -rho(5)
rho(6) = 0.741531185599394D+00
rho(2) = -rho(6)
rho(7) = 0.949107912342759D+00
rho(1) = -rho(7)
rho(4) = 0.0
else if ( k == 8 ) then
rho(5) = 0.183434642495650D+00
rho(4) = -rho(5)
rho(6) = 0.525532409916329D+00
rho(3) = -rho(6)
rho(7) = 0.796666477413627D+00
rho(2) = -rho(7)
rho(8) = 0.960289856497536D+00
rho(1) = -rho(8)
else
write ( *, * )' '
write ( *, * )'ColPnt - Warning!'
write ( *, * )' Equispaced collocation points will be used,'
write ( *, * )' because K =',k,' which is greater than 8.'
do j = 1, k
rho(j) = -1.0D+00 + 2.0D+00 * real ( j - 1, kind = 8 ) &
/ real ( k - 1, kind = 8 )
end do
end if
return
end
diff --git a/pppack/cspint.f90 b/pppack/cspint.f90
index fb201fb..0de7b13 100644
--- a/pppack/cspint.f90
+++ b/pppack/cspint.f90
@@ -1,214 +1,214 @@
!>
!> @file cspint.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine cspint ( ftab, xtab, ntab, a, b, y, e, work, result, ind )
!*************************************************************************
!
!! CSPINT estimates an integral using a spline interpolant.
!
! Discussion:
!
! CSPINT estimates the integral from A to B of F(X) by
! computing the natural spline S(X) that interpolates to F
! and integrating that exactly.
!
! F is supplied to the routine in the form of tabulated data.
!
! Other output from the program includes the definite integral
! from X(1) to X(I) of the spline, and the coefficients
! necessary for the user to evaluate the spline outside of
! this routine.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, real ( kind = 8 ) FTAB(NTAB), contains the tabulated values
! of the functions, FTAB(I)=F(XTAB(I)).
!
! Input, real ( kind = 8 ) XTAB(NTAB), contains the points at
! which the function was evaluated. The XTAB's must be
! distinct and in ascending order.
!
! Input, integer NTAB, the number of entries in FTAB
! and XTAB. NTAB must be at least 3.
!
! Input, real ( kind = 8 ) A, lower limit of integration.
!
! Input, real ( kind = 8 ) B, upper limit of integration.
!
! Output, real ( kind = 8 ) Y(3,NTAB), will contain the coefficients
! of the interpolating natural spline over each subinterval.
!
! For XTAB(I) < = X <= XTAB(I+1),
!
! S(X) = FTAB(I) + Y(1,I)*(X-XTAB(I)) + Y(2,I)*(X-XTAB(I))**2
! + Y(3,I)*(X-XTAB(I))**3
!
! Output, real ( kind = 8 ) E(NTAB), E(I)=the definite integral
! from XTAB(1) to XTAB(I) of S(X).
!
! Workspace, real ( kind = 8 ) WORK(NTAB).
!
! Output, real ( kind = 8 ) RESULT, the estimated value of the integral.
!
! Output, integer IND, error flag.
! IND=0 if NTAB < 3 or the XTAB's are not distinct and in
! ascending order.
! IND=1 otherwise.
!
implicit none
integer ntab
real ( kind = 8 ) a
real ( kind = 8 ) b
real ( kind = 8 ) e(ntab)
real ( kind = 8 ) ftab(ntab)
integer i
integer ind
integer j
real ( kind = 8 ) r
real ( kind = 8 ) result
real ( kind = 8 ) s
real ( kind = 8 ) term
real ( kind = 8 ) u
real ( kind = 8 ) work(ntab)
real ( kind = 8 ) xtab(ntab)
real ( kind = 8 ) y(3,ntab)
ind = 0
if ( ntab < 3 ) then
write(*,*)' '
write(*,*)'CSPINT - Fatal error!'
write(*,*)' NTAB must be at least 3,'
write(*,*)' but your value was NTAB = ',ntab
stop
end if
do i = 1, ntab-1
if ( xtab(i+1) <= xtab(i) ) then
write(*,*)' '
write(*,*)'CSPINT - Fatal error!'
write(*,*)' Interval ',i,' is illegal.'
write(*,*)' XTAB(I) =',xtab(i)
write(*,*)' XTAB(I+1)=',xtab(i+1)
stop
end if
end do
s = 0.0D+00
do i = 1, ntab-1
r = ( ftab(i+1) - ftab(i) ) / ( xtab(i+1) - xtab(i) )
y(2,i) = r - s
s = r
end do
result = 0.0D+00
s = 0.0D+00
r = 0.0D+00
y(2,1) = 0.0D+00
y(2,ntab) = 0.0D+00
do i = 2, ntab-1
y(2,i) = y(2,i) + r * y(2,i-1)
work(i) = 2.0D+00 * ( xtab(i-1) - xtab(i+1) ) - r * s
s = xtab(i+1) - xtab(i)
r = s / work(i)
end do
do j = 2, ntab-1
i = ntab+1-j
y(2,i) = ((xtab(i+1)-xtab(i))*y(2,i+1)-y(2,i))/work(i)
end do
do i = 1, ntab-1
s = xtab(i+1) - xtab(i)
r = y(2,i+1) - y(2,i)
y(3,i) = r / s
y(2,i) = 3.0D+00 * y(2,i)
y(1,i) = ( ftab(i+1) - ftab(i) ) / s - ( y(2,i) + r ) * s
end do
e(1) = 0.0D+00
do i = 1, ntab-1
s = xtab(i+1) - xtab(i)
term = ( ( ( y(3,i) * 0.25D+00 * s &
+ y(2,i) / 3.0D+00 ) * s &
+ y(1,i) * 0.5D+00 ) * s + ftab(i) ) * s
e(i+1) = e(i) + term
end do
!
! Determine where the endpoints A and B lie in the mesh of XTAB's.
!
r = a
u = 1.0D+00
do j = 1, 2
if ( r <= xtab(1) ) then
result = result-u*((r-xtab(1))*y(1,1)* 0.5D+00 + ftab(1))*(r-xtab(1))
else if ( xtab(ntab) <= r ) then
result = result-u*(e(ntab)+(r-xtab(ntab))*(ftab(ntab) + 0.5D+00 * &
(ftab(ntab-1)+(xtab(ntab)-xtab(ntab-1))*y(1,ntab-1))*(r- &
xtab(ntab))))
else
do i = 1, ntab-1
if ( r <= xtab(i+1) ) then
r = r - xtab(i)
result = result-u*(e(i)+(((y(3,i)*0.25D+00*r+y(2,i)/3.0D+00)*r &
+y(1,i) * 0.5D+00 )*r+ftab(i))*r)
go to 100
end if
end do
end if
100 continue
u = -1.0D+00
r = b
end do
ind = 1
return
end
diff --git a/pppack/cubset.f90 b/pppack/cubset.f90
index 567b9de..9256b62 100644
--- a/pppack/cubset.f90
+++ b/pppack/cubset.f90
@@ -1,106 +1,106 @@
!>
!> @file cubset.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine cubset ( tau, c, n, ibcbeg, ibcend )
!*******************************************************************************
!
!! CUBSET sets up a simple cubic spline interpolant.
!
! WARNING: IBCBEG and IBCEND are not set up yet.
!
! A tridiagonal linear system for the unknown slopes S(I) of
! F at TAU(I), I=1,..., N, is generated and then solved by Gauss
! elimination, with S(I) ending up in C(2,I), for all I.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, real ( kind = 8 ) TAU(N), the abscissas or X values of
! the data points. The entries of TAU are assumed to be
! strictly increasing.
!
! Input, integer N, the number of data points. N is
! assumed to be at least 2.
!
! Input/output, real ( kind = 8 ) C(4,N).
! On input, if IBCBEG or IBCBEG is 1 or 2, then C(2,1)
! or C(2,N) should have been set to the desired derivative
! values, as described further under IBCBEG and IBCEND.
! On output, C contains the polynomial coefficients of
! the cubic interpolating spline with interior knots
! TAU(2) through TAU(N-1).
! In the interval interval (TAU(I), TAU(I+1)), the spline
! F is given by F(X) =
! C(1,I) +
! C(2,I) * ( X - TAU(I) ) +
! C(3,I) * ( X - TAU(I) )**2 +
! C(4,I) * ( X - TAU(I) )**3
!
! IBCBEG,
! IBCEND Input, integer IBCBEG, IBCEND, boundary condition
! indicators.
!
! IBCBEG=0 means no boundary condition at TAU(1) is given.
! In this case, the "not-a-knot condition" is used. That
! is, the jump in the third derivative across TAU(2) is
! forced to zero. Thus the first and the second cubic
! polynomial pieces are made to coincide.
!
! IBCBEG=1 means that the slope at TAU(1) is to equal the
! input value C(2,1).
!
! IBCBEG=2 means that the second derivative at TAU(1) is
! to equal C(2,1).
!
! IBCEND=0, 1, or 2 has analogous meaning concerning the
! boundary condition at TAU(N), with the additional
! information taken from C(2,N).
!
implicit none
integer n
real ( kind = 8 ) c(4,n)
integer ibcbeg
integer ibcend
real ( kind = 8 ) tau(n)
!
! Solve for the slopes at internal nodes.
!
call cubslo ( tau, c, n )
!
! Now compute the quadratic and cubic coefficients used in the
! piecewise polynomial representation.
!
call spline_hermite_set ( n, tau, c )
return
end
diff --git a/pppack/cubslo.f90 b/pppack/cubslo.f90
index 659792d..ffe3736 100644
--- a/pppack/cubslo.f90
+++ b/pppack/cubslo.f90
@@ -1,111 +1,111 @@
!>
!> @file cubslo.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine cubslo ( tau, c, n )
!*******************************************************************************
!
!! CUBSLO solves for slopes defining a cubic spline.
!
! Discussion:
!
! A tridiagonal linear system for the unknown slopes S(I) of
! F at TAU(I), I=1,..., N, is generated and then solved by Gauss
! elimination, with S(I) ending up in C(2,I), for all I.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, real ( kind = 8 ) TAU(N), the abscissas or X values of
! the data points. The entries of TAU are assumed to be
! strictly increasing.
!
! Input, integer N, the number of data points. N is
! assumed to be at least 2.
!
! Input/output, real ( kind = 8 ) C(4,N).
! On input, C(1,I) contains the function value at TAU(I),
! for I = 1 to N.
! C(2,1) contains the slope at TAU(1) and C(2,N) contains
! the slope at TAU(N).
! On output, the intermediate slopes at TAU(I) have been
! stored in C(2,I), for I = 2 to N-1.
!
implicit none
integer n
real ( kind = 8 ) c(4,n)
real ( kind = 8 ) g
integer i
integer ibcbeg
integer ibcend
real ( kind = 8 ) tau(n)
!
! Set up the right hand side of the linear system.
! C(2,1) and C(2,N) are presumably already set.
!
do i = 2, n-1
c(2,i) = 3.0D+00 * ( &
( tau(i) - tau(i-1) ) * ( c(1,i+1) - c(1,i) ) / ( tau(i+1) - tau(i) ) + &
( tau(i+1) - tau(i) ) * ( c(1,i) - c(1,i-1) ) / ( tau(i) - tau(i-1) ) )
end do
!
! Set the diagonal coefficients.
!
c(4,1) = 1.0D+00
do i = 2, n-1
c(4,i) = 2.0D+00 * ( tau(i+1) - tau(i-1) )
end do
c(4,n) = 1.0D+00
!
! Set the off-diagonal coefficients.
!
c(3,1) = 0.0D+00
do i = 2, n
c(3,i) = tau(i) - tau(i-1)
end do
!
! Forward elimination.
!
do i = 2, n-1
g = -c(3,i+1) / c(4,i-1)
c(4,i) = c(4,i) + g * c(3,i-1)
c(2,i) = c(2,i) + g * c(2,i-1)
end do
!
! Back substitution for the interior slopes.
!
do i = n-1, 2, -1
c(2,i) = ( c(2,i) - c(3,i) * c(2,i+1) ) / c(4,i)
end do
return
end
diff --git a/pppack/cubspl.f90 b/pppack/cubspl.f90
index ffbc652..3973860 100644
--- a/pppack/cubspl.f90
+++ b/pppack/cubspl.f90
@@ -1,282 +1,282 @@
!>
!> @file cubspl.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine cubspl ( tau, c, n, ibcbeg, ibcend )
!*******************************************************************************
!
!! CUBSPL defines an interpolatory cubic spline.
!
! Discussion:
!
! A tridiagonal linear system for the unknown slopes S(I) of
! F at TAU(I), I=1,..., N, is generated and then solved by Gauss
! elimination, with S(I) ending up in C(2,I), for all I.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, real ( kind = 8 ) TAU(N), the abscissas or X values of
! the data points. The entries of TAU are assumed to be
! strictly increasing.
!
! Input, integer N, the number of data points. N is
! assumed to be at least 2.
!
! Input/output, real ( kind = 8 ) C(4,N).
! On input, if IBCBEG or IBCBEG is 1 or 2, then C(2,1)
! or C(2,N) should have been set to the desired derivative
! values, as described further under IBCBEG and IBCEND.
! On output, C contains the polynomial coefficients of
! the cubic interpolating spline with interior knots
! TAU(2) through TAU(N-1).
!
! In the interval interval (TAU(I), TAU(I+1)), the spline
! F is given by
!
! F(X) =
! C(1,I) +
! C(2,I) * H +
! C(3,I) * H**2 / 2 +
! C(4,I) * H**3 / 6.
!
! where H=X-TAU(I). The routine PPVALU may be used to
! evaluate F or its derivatives from TAU, C, L=N-1,
! and K=4.
!
! Input, integer IBCBEG, IBCEND, boundary condition indicators.
!
! IBCBEG=0 means no boundary condition at TAU(1) is given.
! In this case, the "not-a-knot condition" is used. That
! is, the jump in the third derivative across TAU(2) is
! forced to zero. Thus the first and the second cubic
! polynomial pieces are made to coincide.
!
! IBCBEG=1 means the slope at TAU(1) is to equal the
! input value C(2,1).
!
! IBCBEG=2 means the second derivative at TAU(1) is
! to equal C(2,1).
!
! IBCEND=0, 1, or 2 has analogous meaning concerning the
! boundary condition at TAU(N), with the additional
! information taken from C(2,N).
!
implicit none
integer n
real ( kind = 8 ) c(4,n)
real ( kind = 8 ) divdf1
real ( kind = 8 ) divdf3
real ( kind = 8 ) dtau
real ( kind = 8 ) g
integer i
integer ibcbeg
integer ibcend
real ( kind = 8 ) tau(n)
!
! C(3,*) and C(4,*) are used initially for temporary storage.
!
! Store first differences of the TAU sequence in C(3,*).
!
! Store first divided difference of data in C(4,*).
!
do i = 2, n
c(3,i) = tau(i) - tau(i-1)
end do
do i = 2, n
c(4,i) = ( c(1,i) - c(1,i-1) ) / ( tau(i) - tau(i-1) )
end do
!
! Construct the first equation from the boundary condition
! at the left endpoint, of the form:
!
! C(4,1)*S(1) + C(3,1)*S(2) = C(2,1)
!
! IBCBEG = 0: Not-a-knot
!
if ( ibcbeg == 0 ) then
if ( n <= 2 ) then
c(4,1) = 1.0D+00
c(3,1) = 1.0D+00
c(2,1) = 2.0D+00 * c(4,2)
go to 120
end if
c(4,1) = c(3,3)
c(3,1) = c(3,2) + c(3,3)
c(2,1) = ( ( c(3,2) + 2.0D+00 * c(3,1) ) * c(4,2) * c(3,3) &
+ c(3,2)**2 * c(4,3) ) / c(3,1)
!
! IBCBEG = 1: derivative specified.
!
else if ( ibcbeg == 1 ) then
c(4,1) = 1.0D+00
c(3,1) = 0.0D+00
if ( n == 2 ) then
go to 120
end if
!
! Second derivative prescribed at left end.
!
else
c(4,1) = 2.0D+00
c(3,1) = 1.0D+00
c(2,1) = 3.0D+00 * c(4,2) - c(3,2) / 2.0D+00 * c(2,1)
if ( n == 2 ) then
go to 120
end if
end if
!
! If there are interior knots, generate the corresponding
! equations and carry out the forward pass of Gauss elimination,
! after which the I-th equation reads:
!
! C(4,I) * S(I) + C(3,I) * S(I+1) = C(2,I).
!
do i = 2, n-1
g = -c(3,i+1) / c(4,i-1)
c(2,i) = g * c(2,i-1) + 3.0D+00 * ( c(3,i) * c(4,i+1) + c(3,i+1) * c(4,i) )
c(4,i) = g * c(3,i-1) + 2.0D+00 * ( c(3,i) + c(3,i+1))
end do
!
! Construct the last equation from the second boundary condition, of
! the form
!
! -G * C(4,N-1) * S(N-1) + C(4,N) * S(N) = C(2,N)
!
! If slope is prescribed at right end, one can go directly to
! back-substitution, since the C array happens to be set up just
! right for it at this point.
!
if ( ibcend == 1 ) then
go to 160
end if
if ( 1 < ibcend ) then
go to 110
end if
90 continue
!
! Not-a-knot and 3 <= N, and either 3 < N or also not-a-knot
! at left end point.
!
if ( n /= 3 .or. ibcbeg /= 0 ) then
g = c(3,n-1) + c(3,n)
c(2,n) = ( ( c(3,n) + 2.0D+00 * g ) * c(4,n) * c(3,n-1) + c(3,n)**2 &
* ( c(1,n-1) - c(1,n-2) ) / c(3,n-1) ) / g
g = - g / c(4,n-1)
c(4,n) = c(3,n-1)
c(4,n) = c(4,n) + g * c(3,n-1)
c(2,n) = ( g * c(2,n-1) + c(2,n) ) / c(4,n)
go to 160
end if
!
! N=3 and not-a-knot also at left.
!
100 continue
c(2,n) = 2.0D+00 * c(4,n)
c(4,n) = 1.0D+00
g = -1.0D+00 / c(4,n-1)
c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1)
c(2,n) = ( g * c(2,n-1) + c(2,n) ) / c(4,n)
go to 160
!
! IBCEND = 2: Second derivative prescribed at right endpoint.
!
110 continue
c(2,n) = 3.0D+00 * c(4,n) + c(3,n) / 2.0D+00 * c(2,n)
c(4,n) = 2.0D+00
g = -1.0D+00 / c(4,n-1)
c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1)
c(2,n) = ( g * c(2,n-1)+c(2,n))/c(4,n)
go to 160
!
! N = 2.
!
120 continue
if ( ibcend == 2 ) then
c(2,n) = 3.0D+00 * c(4,n) + c(3,n) / 2.0D+00 * c(2,n)
c(4,n) = 2.0D+00
g = -1.0D+00 / c(4,n-1)
c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1)
c(2,n) = (g*c(2,n-1)+c(2,n)) / c(4,n)
else if ( ibcend == 0 .and. ibcbeg /= 0 ) then
c(2,n) = 2.0D+00 * c(4,n)
c(4,n) = 1.0D+00
g = -1.0D+00 / c(4,n-1)
c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1)
c(2,n) = (g*c(2,n-1)+c(2,n))/c(4,n)
else if ( ibcend == 0 .and. ibcbeg == 0 ) then
c(2,n) = c(4,n)
end if
!
! Back solve the upper triangular system
! C(4,I) * S(I) + C(3,I) * S(I+1) = B(I)
! for the slopes C(2,I), given that S(N) is already known.
!
160 continue
do i = n-1, 1, -1
c(2,i) = ( c(2,i) - c(3,i) * c(2,i+1) ) / c(4,i)
end do
!
! Generate cubic coefficients in each interval, that is, the
! derivatives at its left endpoint, from value and slope at its
! endpoints.
!
do i = 2, n
dtau = c(3,i)
divdf1 = ( c(1,i) - c(1,i-1) ) / dtau
divdf3 = c(2,i-1) + c(2,i) - 2.0D+00 * divdf1
c(3,i-1) = 2.0D+00 * ( divdf1 - c(2,i-1) - divdf3 ) / dtau
c(4,i-1) = 6.0D+00 * divdf3 / dtau**2
end do
return
end
diff --git a/pppack/cwidth.f90 b/pppack/cwidth.f90
index cdca694..cd8d383 100644
--- a/pppack/cwidth.f90
+++ b/pppack/cwidth.f90
@@ -1,351 +1,351 @@
!>
!> @file cwidth.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine cwidth ( w, b, nequ, ncols, integs, nbloks, d, x, iflag )
!*************************************************************************
!
!! CWIDTH solves an almost block diagonal linear system.
!
! Discussion:
!
! This routine is a variation of the theme in the algorithm bandet1
! by Martin and Wilkinson (numer.math. 9(1976)279-307). It solves
! the linear system
! A*X = B
! of NEQU equations in case A is almost block diagonal with all
! blocks having NCOLS columns using no more storage than it takes to
! store the interesting part of A. Such systems occur in the determ-
! ination of the b-spline coefficients of a spline approximation.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! w on input, a two-dimensional array of size (nequ,ncols) contain-
! ing the interesting part of the almost block diagonal coeffici-
! ent matrix a (see description and example below). the array
! integs describes the storage scheme.
! on output, w contains the upper triangular factor u of the
! lu factorization of a possibly permuted version of a . in par-
! ticular, the determinant of a could now be found as
! iflag*w(1,1)*w(2,1)* ... * w(nequ,1) .
!
! b on input, the right side of the linear system, of length nequ.
! the contents of b are changed during execution.
!
! Input, integer NEQU, the number of equations.
!
! Input, integer NCOLS, the block width, that is, the number of
! columns in each block.
!
! integs integer array, of size (2,nequ), describing the block
! structure of a .
! integs(1,i)=no. of rows in block i = nrow
! integs(2,i)=no. of elimination steps in block i
! =overhang over next block = last
! nbloks number of blocks
!
! d work array, to contain row sizes . if storage is scarce, the
! array x could be used in the calling sequence for d .
!
! x on output, contains computed solution (if iflag /= 0), of
! length nequ .
!
! iflag on output, integer
! =(-1)**(no.of interchanges during elimination)
! if a is invertible
! = 0 if a is singular
!
! block structure of a
!
! the interesting part of a is taken to consist of nbloks con-
! secutive blocks, with the i-th block made up of nrowi=integs(1,i)
! consecutive rows and ncols consecutive columns of a , and with
! the first lasti=integs(2,i) columns to the left of the next block.
! these blocks are stored consecutively in the workarray w .
!
! for example, here is an 11th order matrix and its arrangement in
! the workarray w . (the interesting entries of a are indicated by
! their row and column index modulo 10.)
!
! --- a --- --- w ---
!
! nrow1=3
! 11 12 13 14 11 12 13 14
! 21 22 23 24 21 22 23 24
! 31 32 33 34 nrow2=2 31 32 33 34
! last1=2 43 44 45 46 43 44 45 46
! 53 54 55 56 nrow3=3 53 54 55 56
! last2=3 66 67 68 69 66 67 68 69
! 76 77 78 79 76 77 78 79
! 86 87 88 89 nrow4=1 86 87 88 89
! last3=1 97 98 99 90 nrow5=2 97 98 99 90
! last4=1 08 09 00 01 08 09 00 01
! 18 19 10 11 18 19 10 11
! last5=4
!
! for this interpretation of a as an almost block diagonal matrix,
! we have nbloks=5 , and the integs array is
!
! i= 1 2 3 4 5
! k=
! integs(k,i)= 1 3 2 3 1 2
! 2 2 3 1 1 4
!
!
! Method:
!
! gauss elimination with scaled partial pivoting is used, but mult-
! ipliers are n o t s a v e d in order to save storage. rather, the
! right side is operated on during elimination. the two parameters
! i p v t e q and l a s t e q
! are used to keep track of the action. ipvteq is the index of the
! variable to be eliminated next, from equations ipvteq+1,...,lasteq,
! using equation ipvteq (possibly after an interchange) as the pivot
! equation. the entries in the pivot column are a l w a y s in column
! 1 of w . this is accomplished by putting the entries in rows
! ipvteq+1,...,lasteq revised by the elimination of the ipvteq-th
! variable one to the left in w . in this way, the columns of the
! equations in a given block (as stored in w ) will be aligned with
! those of the next block at the moment when these next equations be-
! come involved in the elimination process.
!
! thus, for the above example, the first elimination steps proceed
! as follows.
!
! *11 12 13 14 11 12 13 14 11 12 13 14 11 12 13 14
! *21 22 23 24 *22 23 24 22 23 24 22 23 24
! *31 32 33 34 *32 33 34 *33 34 33 34
! 43 44 45 46 43 44 45 46 *43 44 45 46 *44 45 46 etc.
! 53 54 55 56 53 54 55 56 *53 54 55 56 *54 55 56
! 66 67 68 69 66 67 68 69 66 67 68 69 66 67 68 69
! . . . .
!
! In all other respects, the procedure is standard, including the
! scaled partial pivoting.
!
implicit none
integer nbloks
integer ncols
integer nequ
real ( kind = 8 ) awi1od
real ( kind = 8 ) b(nequ)
real ( kind = 8 ) colmax
real ( kind = 8 ) d(nequ)
integer i
integer icount
integer iflag
integer ii
integer integs(2,nbloks)
integer ipvteq
integer ipvtp1
integer istar
integer j
integer jmax
integer lastcl
integer lasteq
integer lasti
integer nexteq
integer nrowad
real ( kind = 8 ) ratio
real ( kind = 8 ) rowmax
real ( kind = 8 ) sum1
real ( kind = 8 ) temp
real ( kind = 8 ) w(nequ,ncols)
real ( kind = 8 ) x(nequ)
iflag = 1
ipvteq = 0
lasteq = 0
!
! The I loop runs over the blocks.
!
do i = 1, nbloks
!
! The equations for the current block are added to those current-
! ly involved in the elimination process, by increasing lasteq
! by integs(1,i) after the rowsize of these equations has been
! recorded in the array D.
!
nrowad = integs(1,i)
do icount = 1, nrowad
nexteq = lasteq + icount
rowmax = 0.0D+00
do j = 1, ncols
rowmax = max ( rowmax, abs ( w(nexteq,j) ) )
end do
if ( rowmax == 0.0D+00 ) then
go to 150
end if
d(nexteq) = rowmax
end do
lasteq = lasteq + nrowad
!
! There will be lasti=integs(2,i) elimination steps before
! the equations in the next block become involved. further,
! l a s t c l records the number of columns involved in the cur-
! rent elimination step. it starts equal to ncols when a block
! first becomes involved and then drops by one after each elim-
! ination step.
!
lastcl = ncols
lasti = integs(2,i)
do icount = 1, lasti
ipvteq = ipvteq+1
if ( ipvteq < lasteq ) then
go to 30
end if
if ( d(ipvteq) < abs ( w(ipvteq,1)) + d(ipvteq) ) then
go to 100
end if
go to 150
!
! Determine the smallest ISTAR in (ipvteq,lasteq) for
! which abs(w(istar,1))/d(istar) is as large as possible, and
! interchange equations ipvteq and istar in case ipvteq
! < istar .
!
30 continue
colmax = abs(w(ipvteq,1)) / d(ipvteq)
istar = ipvteq
ipvtp1 = ipvteq+1
do ii = ipvtp1, lasteq
awi1od = abs(w(ii,1)) / d(ii)
if ( colmax < awi1od ) then
colmax = awi1od
istar = ii
end if
end do
if ( abs(w(istar,1))+d(istar) == d(istar) ) then
go to 150
end if
if ( istar == ipvteq ) then
go to 60
end if
iflag = -iflag
temp = d(istar)
d(istar) = d(ipvteq)
d(ipvteq) = temp
temp = b(istar)
b(istar) = b(ipvteq)
b(ipvteq) = temp
do j = 1, lastcl
temp = w(istar,j)
w(istar,j) = w(ipvteq,j)
w(ipvteq,j) = temp
end do
!
! Subtract the appropriate multiple of equation ipvteq from
! equations ipvteq+1,...,lasteq to make the coefficient of the
! ipvteq-th unknown (presently in column 1 of w ) zero, but
! store the new coefficients in w one to the left from the old.
!
60 continue
do ii = ipvtp1, lasteq
ratio = w(ii,1)/w(ipvteq,1)
do j = 2, lastcl
w(ii,j-1) = w(ii,j)-ratio*w(ipvteq,j)
end do
w(ii,lastcl) = 0.0D+00
b(ii) = b(ii)-ratio*b(ipvteq)
end do
lastcl = lastcl-1
end do
100 continue
end do
!
! At this point, W and B contain an upper triangular linear system
! equivalent to the original one, with w(i,j) containing entry
! (i, i-1+j ) of the coefficient matrix. solve this system by backsub-
! stitution, taking into account its block structure.
!
! i-loop over the blocks, in reverse order
!
i = nbloks
110 continue
lasti = integs(2,i)
jmax = ncols-lasti
do icount = 1, lasti
sum1 = 0.0D+00
do j = 1, jmax
sum1 = sum1 + x(ipvteq+j) * w(ipvteq,j+1)
end do
x(ipvteq) = ( b(ipvteq) - sum1 ) / w(ipvteq,1)
jmax = jmax+1
ipvteq = ipvteq-1
end do
i = i-1
if ( 0 < i ) then
go to 110
end if
return
150 continue
iflag = 0
return
end
diff --git a/pppack/difequ.f90 b/pppack/difequ.f90
index 51a61f3..48ef91e 100644
--- a/pppack/difequ.f90
+++ b/pppack/difequ.f90
@@ -1,185 +1,185 @@
!>
!> @file difequ.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine difequ ( mode, xx, v )
!*************************************************************************
!
!! DIFEQU returns information about a differential equation.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, integer MODE, an integer indicating the task to be performed.
! 1, initialization
! 2, evaluate de at xx
! 3, specify the next side condition
! 4, analyze the approximation
!
! Input, real ( kind = 8 ) XX, a point at which information is wanted
!
! Output, real ( kind = 8 ) V, depends on the MODE. See comments below
!
implicit none
integer, parameter :: npiece = 100
integer, parameter :: ncoef = 2000
real ( kind = 8 ) break
real ( kind = 8 ) coef
real ( kind = 8 ) eps
real ( kind = 8 ) ep1
real ( kind = 8 ) ep2
real ( kind = 8 ) error
real ( kind = 8 ) factor
integer i
integer iside
integer itermx
integer k
integer kpm
integer l
integer m
integer mode
real ( kind = 8 ) rho
real ( kind = 8 ) s2ovep
real ( kind = 8 ) solutn
real ( kind = 8 ) un
real ( kind = 8 ) v(20)
real ( kind = 8 ) value
real ( kind = 8 ) x
real ( kind = 8 ) xside
real ( kind = 8 ) xx
common /approx/ break(npiece),coef(ncoef),l,kpm
common /side/ m,iside,xside(10)
common /other/ itermx,k,rho(19)
!
! This sample of DIFEQU is for the example in chapter xv. It is a
! nonlinear second order two point boundary value problem.
!
go to (10,50,60,110), mode
!
! Initialize everything, Set the order M of the differential equation,
! the nondecreasing sequence xside(i),i=1,...,m, of points at which side
! conditions are given and anything else necessary.
!
10 continue
m = 2
xside(1) = 0.0D+00
xside(2) = 1.0D+00
!
! Print out heading.
!
write ( *, * ) ' '
write ( *, * ) ' Carrier''s nonlinear perturb. problem'
write ( *, * ) ' '
eps = 0.005D+00
write(*,*)'EPS = ',eps
!
! Set constants used in formula for solution below.
!
factor = ( sqrt ( 2.0D+00 ) + sqrt ( 3.0D+00 ) )**2
s2ovep = sqrt ( 2.0D+00 / eps )
!
! Initial guess for Newton iteration. un(x)=x*x-1.
!
l = 1
break(1) = 0.0D+00
do i = 1, kpm
coef(i) = 0.0D+00
end do
coef(1) = -1.0D+00
coef(3) = 2.0D+00
itermx = 10
return
!
! Provide value of left side coefficients and right side at xx .
! specifically, at xx the dif.equ. reads:
!
! v(m+1)d**m+v(m)d**(m-1) + ... + v(1)d**0 = v(m+2)
!
! in terms of the quantities v(i),i=1,...,m+2, to be computed here.
!
50 continue
v(3) = eps
v(2) = 0.0D+00
call ppvalu(break,coef,l,kpm,xx,0,un)
v(1) = 2.0D+00 * un
v(4) = un**2 + 1.0D+00
return
!
! provide the M side conditions. these conditions are of the form
! v(m+1)d**m+v(m)d**(m-1) + ... + v(1)d**0 = v(m+2)
! in terms of the quantities v(i),i=1,...,m+2, to be specified here.
! note that v(m+1)=0 for customary side conditions.
!
60 continue
v(m+1) = 0.0D+00
if ( iside == 1 ) then
v(2) = 1.0D+00
v(1) = 0.0D+00
v(4) = 0.0D+00
iside = iside+1
else if ( iside == 2 ) then
v(2) = 0.0D+00
v(1) = 1.0D+00
v(4) = 0.0D+00
iside = iside + 1
end if
return
!
! Calculate the error near the boundary layer at 1.
!
110 continue
write(*,*)' '
write(*,*)' X, G(X) and G(X)-F(X) at selected points:'
write(*,*)' '
x = 0.75D+00
do i = 1, 9
ep1 = exp ( s2ovep * ( 1.0D+00 - x ) ) * factor
ep2 = exp ( s2ovep * ( 1.0D+00 + x ) ) * factor
solutn = 12.0D+00 / ( 1.0D+00 + ep1 )**2 * ep1 &
+12.0D+00 / ( 1.0D+00 + ep2 )**2 * ep2 - 1.0D+00
call ppvalu(break,coef,l,kpm,x,0,value)
error = solutn-value
write ( *, '(1x,3g14.6)' ) x, solutn, error
x = x+0.03125
end do
return
end
diff --git a/pppack/dtblok.f90 b/pppack/dtblok.f90
index 03d5727..1327c65 100644
--- a/pppack/dtblok.f90
+++ b/pppack/dtblok.f90
@@ -1,103 +1,103 @@
!>
!> @file dtblok.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine dtblok ( bloks, integs, nbloks, ipivot, iflag, detsgn, detlog )
!*************************************************************************
!
!! DTBLOK gets the determinant of an almost block diagonal matrix.
!
! Discussion:
!
! The matrix's PLU factorization must have been obtained
! previously by FCBLOK.
!
! The logarithm of the determinant is computed instead of the
! determinant itself to avoid the danger of overflow or underflow
! inherent in this calculation.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! bloks, integs, nbloks, ipivot, iflag are as on return from fcblok.
! in particular, iflag=(-1)**(number of interchanges dur-
! ing factorization) if successful, otherwise iflag=0.
!
! detsgn on output, contains the sign of the determinant.
!
! detlog on output, contains the natural logarithm of the determi-
! nant if determinant is not zero. otherwise contains 0.
!
implicit none
integer nbloks
real ( kind = 8 ) bloks(1)
real ( kind = 8 ) detlog
real ( kind = 8 ) detsgn
integer i
integer iflag
integer index
integer indexp
integer integs(3,nbloks)
integer ip
integer ipivot(1)
integer k
integer last
integer nrow
detsgn = iflag
detlog = 0.0D+00
if ( iflag == 0 ) then
return
end if
index = 0
indexp = 0
do i = 1, nbloks
nrow = integs(1,i)
last = integs(3,i)
do k = 1, last
ip = index + nrow * (k-1) + ipivot(indexp+k)
detlog = detlog + log ( abs ( bloks(ip) ) )
detsgn = detsgn * sign ( 1.0D+00, bloks(ip) )
end do
index = nrow*integs(2,i)+index
indexp = indexp+nrow
end do
return
end
diff --git a/pppack/eqblok.f90 b/pppack/eqblok.f90
index 411f1de..4476458 100644
--- a/pppack/eqblok.f90
+++ b/pppack/eqblok.f90
@@ -1,192 +1,192 @@
!>
!> @file eqblok.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine eqblok ( t, n, kpm, work1, work2, bloks, lenblk, integs, &
nbloks, b )
!*************************************************************************
!
!! EQBLOK is to be called in COLLOC.
!
! Method:
!
! Each breakpoint interval gives rise to a block in the linear system.
! this block is determined by the K collocation equations in the interval
! with the side conditions (if any) in the interval interspersed ap-
! propriately, and involves the kpm b-splines having the interval in
! their support. correspondingly, such a block has nrow=k+isidel
! rows, with isidel=number of side conditions in this and the prev-
! ious intervals, and ncol=kpm columns.
!
! Further, because the interior knots have multiplicity k, we can
! carry out (in slvblk) k elimination steps in a block before pivot-
! ing might involve an equation from the next block. in the last block,
! of course, all kpm elimination steps will be carried out (in slvblk).
!
! see the detailed comments in the solveblok package for further in-
! formation about the almost block diagonal form used here.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! input
!
! Input, real ( kind = 8 ) T(N+KPM), the knot sequence.
!
! Input, integer N, the dimension of the approximating spline space,
! that is, the order of the linear system to be constructed.
!
! Input, integer KPM, = K + M, the order of the approximating spline.
!
! Input, integer LENBLK, the maximum length of the array BLOKS,
! as allowed by the dimension statement in COLLOC.
!
! work areas
!
! work1 used in putit, of size (kpm,kpm)
! work2 used in putit, of size (kpm,m+1)
!
! output
!
! bloks the coefficient matrix of the linear system, stored in al-
! most block diagonal form, of size
! kpm*sum(integs(1,i) , i=1,...,nbloks)
!
! integs an integer array, of size (3,nbloks), describing the block
! structure.
! integs(1,i) = number of rows in block i
! integs(2,i) = number of columns in block i
! integs(3,i) = number of elimination steps which can be
! carried out in block i before pivoting might
! bring in an equation from the next block.
!
! nbloks number of blocks, equals number of polynomial pieces
!
! b the right side of the linear system, stored corresponding to the
! almost block diagonal form, of size sum(integs(1,i) , i=1,...,
! nbloks).
!
implicit none
integer kpm
integer n
real ( kind = 8 ) b(*)
real ( kind = 8 ) bloks(*)
integer i
integer index
integer indexb
integer integs(3,*)
integer iside
integer isidel
integer itermx
integer k
integer left
integer lenblk
integer m
integer nbloks
integer nrow
real ( kind = 8 ) rho
real ( kind = 8 ) t(n+kpm)
real ( kind = 8 ) work1(kpm,kpm)
real ( kind = 8 ) work2(kpm,*)
real ( kind = 8 ) xside
common /side/ m,iside,xside(10)
common /other/ itermx,k,rho(19)
index = 1
indexb = 1
i = 0
iside = 1
do left = kpm, n, k
i = i + 1
!
! Determine integs(.,i)
!
integs(2,i) = kpm
if ( n <= left ) then
integs(3,i) = kpm
isidel = m
go to 30
end if
integs(3,i) = k
!
! At this point, iside-1 gives the number of side conditions
! incorporated so far. adding to this the side conditions in the
! current interval gives the number isidel .
!
isidel = iside - 1
do
if ( isidel == m ) then
exit
end if
if ( t(left+1) <= xside(isidel+1) ) then
exit
end if
isidel = isidel + 1
end do
30 continue
nrow = k + isidel
integs(1,i) = nrow
!
! The detailed equations for this block are generated and put
! together in PUTIT.
!
if ( lenblk < index + nrow * kpm - 1 ) then
write ( *, * ) ' '
write ( *, * ) 'EQBLOK - Fatal error!'
write ( *, * ) ' The dimension of BLOKS is too small.'
write ( *, * ) ' LENBLK = ', lenblk
stop
end if
call putit ( t, kpm, left, work1, work2, bloks(index), nrow, b(indexb) )
index = index + nrow * kpm
indexb = indexb + nrow
end do
nbloks = i
return
end
diff --git a/pppack/evnnot.f90 b/pppack/evnnot.f90
index 518bf00..bd553fc 100644
--- a/pppack/evnnot.f90
+++ b/pppack/evnnot.f90
@@ -1,85 +1,85 @@
!>
!> @file evnnot.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine evnnot ( break, coef, l, k, brknew, lnew, coefg )
!*************************************************************************
!
!! EVNNOT is a "fake" version of NEWNOT.
!
! Discussion:
!
! EVNNOT returns LNEW+1 knots in BRKNEW which are
! evenly spaced between BREAK(1) and BREAK(L+1).
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, real ( kind = 8 ) BREAK(L+1), coef, l, k.....contains the
! pp-representation of a certain function F of order K. Specifically,
! d**(k-1)f(x)=coef(k,i) for break(i) <= x < break(i+1)
!
! Input, integer LNEW, the number of subintervals into which the interval
! (a,b) is to be sectioned by the new breakpoint sequence brknew .
!
! Output, real ( kind = 8 ) BRKNEW(LNEW+1), the new breakpoints.
!
! Output, coefg the coefficient part of the pp-repr. break, coefg, l, 2
! for the monotone p.linear function G with respect to which brknew will
! be equidistributed.
!
implicit none
integer k
integer l
integer lnew
real ( kind = 8 ) break(l+1)
real ( kind = 8 ) brknew(lnew+1)
real ( kind = 8 ) coef(k,l)
real ( kind = 8 ) coefg(2,l)
integer i
if ( lnew == 0 ) then
brknew(1) = 0.5D+00 * ( break(1) + break(l+1) )
else
do i = 1, lnew+1
brknew(i) = ( real ( lnew - i + 1, kind = 8 ) * break(1) &
+ real ( i - 1, kind = 8 ) * break(l+1) ) &
/ real ( lnew, kind = 8 )
end do
end if
return
end
diff --git a/pppack/factrb.f90 b/pppack/factrb.f90
index 298312b..fa0a395 100644
--- a/pppack/factrb.f90
+++ b/pppack/factrb.f90
@@ -1,188 +1,188 @@
!>
!> @file factrb.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine factrb ( w, ipivot, d, nrow, ncol, last, iflag )
!*************************************************************************
!
!! FACTRB constructs a partial PLU factorization.
!
! Discussion:
!
! This factorization corresponds to steps 1 through LAST in Gauss
! elimination for the matrix W of order ( NROW, NCOL ), using
! pivoting of scaled rows.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input/output, real ( kind = 8 ) W(NROW,NCOL); on input, contains the
! matrix to be partially factored; on output, the partial factorization.
!
! Output, integer IPIVOT(NROW), contains a record of the pivoting
! strategy used; row IPIVOT(I) is used during the I-th elimination step,
! for I = 1, ..., LAST.
!
! Workspace, real ( kind = 8 ) D(NROW), used to store the maximum entry
! in each row.
!
! Input, integer NROW, the number of rows of W.
!
! Input, integer NCOL, the number of columns of W.
!
! Input, integer LAST, the number of elimination steps to be carried out.
!
! Input/output, integer IFLAG. On output, equals the input value
! times (-1)**(number of row interchanges during the factorization
! process), in case no zero pivot was encountered.
! Otherwise, iflag=0 on output.
!
implicit none
integer ncol
integer nrow
real ( kind = 8 ) awikdi
real ( kind = 8 ) colmax
real ( kind = 8 ) d(nrow)
integer i
integer iflag
integer ipivi
integer ipivk
integer ipivot(nrow)
integer j
integer k
integer kp1
integer last
real ( kind = 8 ) ratio
real ( kind = 8 ) rowmax
real ( kind = 8 ) w(nrow,ncol)
!
! Initialize IPIVOT and D.
!
do i = 1, nrow
ipivot(i) = i
end do
do i = 1, nrow
rowmax = 0.0D+00
do j = 1, ncol
rowmax = max ( rowmax, abs ( w(i,j) ) )
end do
if ( rowmax == 0.0D+00 ) then
iflag = 0
return
end if
d(i) = rowmax
end do
!
! Gauss elimination with pivoting of scaled rows, loop over k=1,.,last
!
k = 1
!
! As pivot row for k-th step, pick among the rows not yet used,
! i.e., from rows ipivot(k),...,ipivot(nrow), the one whose k-th
! entry (compared to the row size) is largest. then, if this row
! does not turn out to be row ipivot(k), redefine ipivot(k) ap-
! propriately and record this interchange by changing the sign
! of IFLAG.
!
30 continue
ipivk = ipivot(k)
if ( k == nrow ) then
if ( abs(w(ipivk,nrow))+d(ipivk) <= d(ipivk) ) then
iflag = 0
end if
return
end if
j = k
kp1 = k+1
colmax = abs(w(ipivk,k))/d(ipivk)
!
! Find the largest pivot
!
do i = kp1, nrow
ipivi = ipivot(i)
awikdi = abs(w(ipivi,k)) / d(ipivi)
if ( colmax < awikdi ) then
colmax = awikdi
j = i
end if
end do
if ( j /= k ) then
ipivk = ipivot(j)
ipivot(j) = ipivot(k)
ipivot(k) = ipivk
iflag = -iflag
end if
!
! If pivot element is too small in absolute value, declare
! matrix to be noninvertible and quit.
!
if ( abs(w(ipivk,k))+d(ipivk) <= d(ipivk) ) then
iflag = 0
return
end if
!
! Otherwise, subtract the appropriate multiple of the pivot
! row from remaining rows, i.e., the rows ipivot(k+1),...,
! ipivot(nrow), to make k-th entry zero. save the multiplier in
! its place.
!
do i = kp1, nrow
ipivi = ipivot(i)
w(ipivi,k) = w(ipivi,k)/w(ipivk,k)
ratio = -w(ipivi,k)
do j = kp1, ncol
w(ipivi,j) = ratio*w(ipivk,j)+w(ipivi,j)
end do
end do
k = kp1
!
! Check for having reached the next block.
!
if ( k <= last ) then
go to 30
end if
return
end
diff --git a/pppack/fcblok.f90 b/pppack/fcblok.f90
index 72f11d8..5a0c1d9 100644
--- a/pppack/fcblok.f90
+++ b/pppack/fcblok.f90
@@ -1,126 +1,126 @@
!>
!> @file fcblok.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine fcblok ( bloks, integs, nbloks, ipivot, scrtch, iflag )
!*************************************************************************
!
!! FCBLOK supervises the PLU factorization with pivoting of
! scaled rows of the almost block diagonal matrix.
!
! The almost block diagonal matrix is stored in the arrays
! BLOKS and INTEGS.
!
! The FACTRB routine carries out steps 1,...,last of gauss
! elimination (with pivoting) for an individual block.
!
! The SHIFTB routine shifts the remaining rows to the top of
! the next block
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! bloks an array that initially contains the almost block diagonal
! matrix a to be factored, and on return contains the com-
! puted factorization of a .
!
! integs an integer array describing the block structure of a .
!
! nbloks the number of blocks in a .
!
! ipivot an integer array of dimension sum (integs(1,n) ; n=1,
! ...,nbloks) which, on return, contains the pivoting stra-
! tegy used.
!
! scrtch work area required, of length max (integs(1,n) ; n=1,
! ...,nbloks).
!
! iflag output parameter;
! =0 in case matrix was found to be singular.
! otherwise,
! =(-1)**(number of row interchanges during factorization)
!
implicit none
integer nbloks
real ( kind = 8 ) bloks(*)
integer i
integer iflag
integer index
integer indexb
integer indexn
integer integs(3,nbloks)
integer ipivot(*)
integer last
integer ncol
integer nrow
real ( kind = 8 ) scrtch(*)
iflag = 1
indexb = 1
indexn = 1
i = 1
!
! Loop over the blocks. i is loop index
!
do
index = indexn
nrow = integs(1,i)
ncol = integs(2,i)
last = integs(3,i)
!
! Carry out elimination on the I-th block until next block
! enters, for columns 1 through LAST of I-th block.
!
call factrb ( bloks(index), ipivot(indexb), scrtch, nrow, ncol, &
last, iflag )
!
! Check for having reached a singular block or the last block.
!
if ( iflag == 0 .or. i == nbloks ) then
exit
end if
i = i + 1
indexn = nrow * ncol + index
!
! Put the rest of the I-th block onto the next block.
!
call shiftb ( bloks(index), ipivot(indexb), nrow, ncol, last, &
bloks(indexn), integs(1,i), integs(2,i) )
indexb = indexb + nrow
end do
return
end
diff --git a/pppack/interv.f90 b/pppack/interv.f90
index 60b9cb1..e2ed195 100644
--- a/pppack/interv.f90
+++ b/pppack/interv.f90
@@ -1,223 +1,223 @@
!>
!> @file interv.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine interv ( xt, lxt, x, left, mflag )
!*******************************************************************************
!
!! INTERV brackets a real value in an ascending vector of values.
!
! Discussion:
!
! The XT array is a set of increasing values. The goal of the routine
! is to determine the largest index I so that XT(I) <= X.
!
! The routine is designed to be efficient in the common situation
! that it is called repeatedly, with X taken from an increasing
! or decreasing sequence.
!
! This will happen when a piecewise polynomial is to be graphed.
! The first guess for LEFT is therefore taken to be the value
! returned at the previous call and stored in the local variable ILO.
!
! A first check ascertains that ILO < LXT. This is necessary
! since the present call may have nothing to do with the previous
! call. Then, if XT(ILO) < = X < XT(ILO+1), we set LEFT=ILO
! and are done after just three comparisons.
!
! Otherwise, we repeatedly double the difference ISTEP=IHI-ILO
! while also moving ILO and IHI in the direction of X, until
! XT(ILO) < = X < XT(IHI)
! after which we use bisection to get, in addition, ILO+1=IHI.
! LEFT=ILO is then returned.
!
! Modified:
!
! 05 February 2004
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, real ( kind = 8 ) XT(LXT), a nondecreasing sequence of values.
!
! Input, integer LXT, the dimension of XT.
!
! Input, real ( kind = 8 ) X, the point whose location with
! respect to the sequence XT is to be determined.
!
! Output, integer LEFT, the index of the bracketing value:
! 1 if X < XT(1)
! I if XT(I) <= X < XT(I+1)
! LXT if XT(LXT) <= X
!
! Output, integer MFLAG, indicates whether X lies within the
! range of the data.
! -1: X < XT(1)
! 0: XT(I) <= X < XT(I+1)
! +1: XT(LXT) <= X
!
implicit none
integer lxt
integer left
integer mflag
integer ihi
integer, save :: ilo = 1
integer istep
integer middle
real ( kind = 8 ) x
real ( kind = 8 ) xt(lxt)
!$omp threadprivate(ilo)
ihi = ilo + 1
if ( lxt <= ihi ) then
if ( xt(lxt) <= x ) then
go to 110
end if
if ( lxt <= 1 ) then
mflag = -1
left = 1
return
end if
ilo = lxt - 1
ihi = lxt
end if
if ( xt(ihi) <= x ) then
go to 40
end if
if ( xt(ilo) <= x ) then
mflag = 0
left = ilo
return
end if
!
! Now X < XT(ILO). Decrease ILO to capture X.
!
istep = 1
31 continue
ihi = ilo
ilo = ihi - istep
if ( 1 < ilo ) then
if ( xt(ilo) <= x ) then
go to 50
end if
istep = istep * 2
go to 31
end if
ilo = 1
if ( x < xt(1) ) then
mflag = -1
left = 1
return
end if
go to 50
!
! Now XT(IHI) <= X. Increase IHI to capture X.
!
40 continue
istep = 1
41 continue
ilo = ihi
ihi = ilo + istep
if ( ihi < lxt ) then
if ( x < xt(ihi) ) then
go to 50
end if
istep = istep * 2
go to 41
end if
if ( xt(lxt) <= x ) then
go to 110
end if
!
! Now XT(ILO) < = X < XT(IHI). Narrow the interval.
!
ihi = lxt
50 continue
do
middle = ( ilo + ihi ) / 2
if ( middle == ilo ) then
mflag = 0
left = ilo
return
end if
!
! It is assumed that MIDDLE = ILO in case IHI = ILO+1.
!
if ( xt(middle) <= x ) then
ilo = middle
else
ihi = middle
end if
end do
!
! Set output and return.
!
110 continue
mflag = 1
if ( x == xt(lxt) ) then
mflag = 0
end if
do left = lxt, 1, -1
if ( xt(left) < xt(lxt) ) then
return
end if
end do
return
end
diff --git a/pppack/knots.f90 b/pppack/knots.f90
index c7be744..574b793 100644
--- a/pppack/knots.f90
+++ b/pppack/knots.f90
@@ -1,103 +1,103 @@
!>
!> @file knots.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine knots ( break, l, kpm, t, n )
!*************************************************************************
!
!! KNOTS is to be called in COLLOC.
!
! Discussion:
!
! From the given breakpoint sequence BREAK the routine constructs the
! knot sequence T so that
!
! SPLINE(K+M,T) = PP(K+M,BREAK)
!
! with M-1 continuous derivatives. This means that
!
! t(1),...,t(n+kpm) = break(1) kpm times, then break(2),...,
! break(l) each k times, then, finally, break(l+1) kpm times.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, real ( kind = 8 ) BREAK(L+1), the breakpoint sequence.
!
! Input, integer L, the number of intervals or pieces.
!
! Input, integer KPM, = K+M, the order of the piecewise polynomial
! function or spline.
!
! Output, real ( kind = 8 ) T(N+KPM), the knot sequence.
!
! Output, integer N, = L*K+M = the dimension of SPLINE(K+M,T).
!
implicit none
integer kpm
integer l
integer n
real ( kind = 8 ) break(l+1)
integer iside
integer j
integer jj
integer jjj
integer k
integer ll
integer m
real ( kind = 8 ) t(*)
real ( kind = 8 ) xside
common /side/ m,iside,xside(10)
k = kpm-m
n = l*k+m
jj = n+kpm
jjj = l+1
do ll = 1, kpm
t(jj) = break(jjj)
jj = jj-1
end do
do j = 1, l
jjj = jjj-1
do ll = 1, k
t(jj) = break(jjj)
jj = jj-1
end do
end do
t(1:kpm) = break(1)
return
end
diff --git a/pppack/l2appr.f90 b/pppack/l2appr.f90
index 861d3bd..58a6c23 100644
--- a/pppack/l2appr.f90
+++ b/pppack/l2appr.f90
@@ -1,196 +1,196 @@
!>
!> @file l2appr.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine l2appr ( t, n, k, q, diag, bcoef )
!*************************************************************************
!
!! L2APPR constructs a weighted L2 spline approximation to given data.
!
! Discussion:
!
! The routine constructs the weighted discrete L2-approximation by
! splines of order K with knot sequence T(1), ..., T(n+k) to
! given data points ( TAU(1:NTAU), GTAU(1:NTAU) ).
!
! The B-spline coefficients BCOEF of the approximating spline are
! determined from the normal equations using Cholesky's method.
!
! Method:
!
! The B-spline coefficients of the L2-approximation are determined as the
! solution of the normal equations
!
! sum ( (b(i), b(j) ) * bcoef(j) : j=1,...,n) =(b(i),g),
! i=1, ..., n .
! Here, b(i) denotes the i-th B-spline, G denotes the function to
! be approximated, and the inner product of two functions F and G
! is given by
!
! (f,g) := sum ( f(tau(i))*g(tau(i))*weight(i) : i=1,...,ntau) .
!
! The arrays TAU and WEIGHT are given in common block
! DATA, as is the array GTAU containing the sequence
! g(tau(i)), i=1,..., NTAU.
!
! The relevant function values of the B-splines b(i), i=1,...,n, are
! supplied by the subprogram BSPLVB.
!
! The coefficient matrix C, with
! c(i,j) := (b(i), b(j)), i,j=1,...,n,
! of the normal equations is symmetric and (2*k-1)-banded, therefore
! can be specified by giving its K bands at or below the diagonal.
! For i=1,...,n, we store
! (b(i),b(j)) = c(i,j) in q(i-j+1,j), j=i,...,min(i+k-1,n)
! and the right side
! (b(i), g ) in bcoef(i).
!
! Since B-spline values are most efficiently generated by finding sim-
! ultaneously the value of every nonzero B-spline at one point,
! the entries of C (i.e., of Q ), are generated by computing, for
! each ll, all the terms involving tau(ll) simultaneously and adding
! them to all relevant entries.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, real ( kind = 8 ) T(N+K), the knot sequence.
!
! Input, integer N, the dimension of the space of splines of order K
! with knots t.
!
! Input, integer K, the order.
!
! work arrays
!
! q....a work array of size (at least) k*n. its first k rows are used
! for the k lower diagonals of the gramian matrix c.
!
! diag.....a work array of length n used in bchfac .
!
! input via c o m m o n /data/
!
! ntau.....number of data points
! (tau(i),gtau(i)), i=1,...,ntau are the ntau data points to be
! fitted .
! weight(i), i=1,...,ntau are the corresponding weights .
!
! output
! bcoef(1), ..., bcoef(n) the b-spline coefficients of the l2-appr.
!
implicit none
integer k
integer n
integer, parameter :: ntmax = 200
real ( kind = 8 ) bcoef(n)
real ( kind = 8 ) biatx(k)
real ( kind = 8 ) diag(n)
real ( kind = 8 ) dw
real ( kind = 8 ) gtau
integer i
integer j
integer jj
integer left
integer leftmk
integer ll
integer mm
integer ntau
real ( kind = 8 ) q(k,n)
real ( kind = 8 ) t(n+k)
real ( kind = 8 ) tau
real ( kind = 8 ) totalw
real ( kind = 8 ) weight
COMMON /DATA/ tau(ntmax),gtau(ntmax),weight(ntmax),totalw,ntau
bcoef(1:n) = 0.0D+00
q(1:k,1:n) = 0.0D+00
left = k
leftmk = 0
do ll = 1, ntau
!
! Locate LEFT such that tau(ll) in (t(left),t(left+1)).
!
do
if ( left == n ) then
exit
end if
if ( tau(ll) < t(left+1) ) then
exit
end if
left = left + 1
leftmk = leftmk + 1
end do
call bsplvb ( t, k, 1, tau(ll), left, biatx )
!
! biatx(mm) contains the value of b(left-k+mm) at tau(ll).
! hence, with dw := biatx(mm)*weight(ll), the number dw*gtau(ll)
! is a summand in the inner product
! (b(left-k+mm), g) which goes into bcoef(left-k+mm)
! and the number biatx(jj)*dw is a summand in the inner product
! (b(left-k+jj), b(left-k+mm)), into q(jj-mm+1,left-k+mm)
! since (left-k+jj)-(left-k+mm)+1 = jj - mm + 1 .
!
do mm = 1, k
dw = biatx(mm)*weight(ll)
j = leftmk+mm
bcoef(j) = dw*gtau(ll)+bcoef(j)
i = 1
do jj = mm, k
q(i,j) = biatx(jj)*dw+q(i,j)
i = i+1
end do
end do
end do
!
! Construct the Cholesky factorization for C in q , then
! use it to solve the normal equations
! c*x = bcoef
! for X, and store X in BCOEF.
!
call bchfac ( q, k, n, diag )
call bchslv ( q, k, n, bcoef )
return
end
diff --git a/pppack/l2err.f90 b/pppack/l2err.f90
index 7449380..e25b7ab 100644
--- a/pppack/l2err.f90
+++ b/pppack/l2err.f90
@@ -1,146 +1,146 @@
!>
!> @file l2err.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine l2err ( iprfun, ftau, error )
!*************************************************************************
!
!! L2ERR computes the errors of an L2 approximation.
!
! Discussion:
!
! This routine computes various errors of the current L2-approximation,
! whose piecewise polynomial representation is contained in common
! block APPROX, to the given data contained in common block data.
!
! It prints out the average error errl1, the l2-error errl2, and the
! maximum error errmax.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, integer IPRFUN. If iprfun= 1, the routine prints out
! the value of the approximation as well as its error at
! every data point.
!
! Output, real ( kind = 8 ) FTAU(NTAU), contains the value of the computed
! approximation at each value TAU(1:NTAU).
!
! Output, error(1), ..., error(ntau), with error(i)=scale*(g-f)
! at tau(i), all i. here, SCALE equals 1. in case
! iprfun /= 1 , or the absolute error is greater than 100 some-
! where. otherwise, SCALE is such that the maximum of
! abs(error)) over all I lies between 10 and 100. This
! makes the printed output more illustrative.
!
implicit none
integer, parameter :: lpkmax = 100
integer, parameter :: ntmax = 200
integer, parameter :: ltkmax = 2000
integer ntau
real ( kind = 8 ) break
real ( kind = 8 ) coef
real ( kind = 8 ) err
real ( kind = 8 ) errl1
real ( kind = 8 ) errl2
real ( kind = 8 ) errmax
real ( kind = 8 ) error(ntau)
real ( kind = 8 ) ftau(ntau)
real ( kind = 8 ) gtau
integer ie
integer iprfun
integer k
integer l
integer ll
real ( kind = 8 ) scale
real ( kind = 8 ) tau
real ( kind = 8 ) totalw
real ( kind = 8 ) weight
COMMON /DATA/ tau(ntmax),gtau(ntmax),weight(ntmax),totalw,ntau
common /approx/ break(lpkmax),coef(ltkmax),l,k
errl1 = 0.0D+00
errl2 = 0.0D+00
errmax = 0.0D+00
do ll = 1, ntau
call ppvalu(break,coef,l,k,tau(ll),0,ftau(ll))
error(ll) = gtau(ll)-ftau(ll)
err = abs(error(ll))
if ( errmax < err ) then
errmax = err
end if
errl1 = errl1 + err * weight(ll)
errl2 = errl2 + err**2 * weight(ll)
end do
errl1 = errl1 / totalw
errl2 = sqrt ( errl2 / totalw )
write ( *, * ) ' '
write ( *, * ) ' Least square error =',errl2
write ( *, * ) ' Average error =',errl1
write ( *, * ) ' Maximum error =',errmax
write ( *, * ) ' '
if ( iprfun /= 1 ) then
return
end if
!
! Scale error curve and print
!
ie = 0
scale = 1.0D+00
if ( errmax < 10.0D+00 ) then
do ie = 1, 9
scale = scale * 10.0D+00
if ( 10.0D+00 <= errmax * scale ) then
exit
end if
end do
end if
error(1:ntau) = error(1:ntau) * scale
write(*,60) ie, (ll,tau(ll),ftau(ll),error(ll),ll=1,ntau)
60 format (///14x,'approximation and scaled error curve'/ &
7x,'data point',7x,'approximation',3x,'deviation x 10**',i1/ &
(i4, f16.8,f16.8,f17.6))
return
end
diff --git a/pppack/l2knts.f90 b/pppack/l2knts.f90
index 6c064a6..c771c16 100644
--- a/pppack/l2knts.f90
+++ b/pppack/l2knts.f90
@@ -1,83 +1,83 @@
!>
!> @file l2knts.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine l2knts ( break, l, k, t, n )
!*************************************************************************
!
!! L2KNTS converts breakpoints to knots.
!
! Discussion:
!
! The breakpoint sequence BREAK is converted into a corresponding
! knot sequence T to allow the representation of a piecewise
! polynomial function of order K with K-2 continuous derivatives
! as a spline of order K with knot sequence T.
!
! This means that
! t(1), ..., t(n+k)= break(1) k times, then break(i), i=2,...,l, each
! once, then break(l+1) k times. Therefore, n=k-1+l.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, integer K, the order.
!
! Input, integer L, the number of polynomial pieces.
!
! Input, real ( kind = 8 ) BREAK(L+1), the breakpoint sequence.
!
! Output, real ( kind = 8 ) T(N+K), the knot sequence.
!
! Output, integer N, the dimension of the corresponding spline space
! of order K.
!
implicit none
integer k
integer l
integer n
real ( kind = 8 ) break(l+1)
integer i
real ( kind = 8 ) t(k-1+l+k)
t(1:k-1) = break(1)
do i = 1, l
t(k-1+i) = break(i)
end do
n = k-1+l
t(n+1:n+k) = break(l+1)
return
end
diff --git a/pppack/newnot.f90 b/pppack/newnot.f90
index f3c556a..436356e 100644
--- a/pppack/newnot.f90
+++ b/pppack/newnot.f90
@@ -1,206 +1,206 @@
!>
!> @file newnot.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see .
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran
!>
subroutine newnot ( break, coef, l, k, brknew, lnew, coefg )
!*************************************************************************
!
!! NEWNOT returns LNEW+1 knots which are equidistributed on (A,B).
!
! Discussion:
!
! (a,b) = (break(1),break(l+1)) with respect to a certain monotone
! function G related to the K-th root of the K-th derivative of the
! piecewise polynomial function F whose piecewise polynomial
! representation is contained in break, coef, l, k .
!
! method
!
! The K-th derivative of the given piecewise polynomial function F does
! not exist, except perhaps as a linear combination of delta functions.
! Nevertheless, we construct a piecewise constant function H with
! breakpoint sequence BREAK which is approximately proportional
! to abs(d**k(f)).
!
! Specifically, on (break(i), break(i+1)),
!
! abs(jump at break(i) of pc) abs(jump at break(i+1) of pc)
! h=-------------- + ----------------------------
! break(i+1)-break(i-1) break(i+2) - break(i)
!
! with pc the p.constant (k-1)st derivative of f .
! then, the p.linear function g is constructed as
!
! g(x) = integral of h(y)**(1/k) for y from a to x
!
! and its pp coefficients stored in coefg .
!
! then brknew is determined by
!
! brknew(i) = a+g**(-1)((i-1)*step) , i=1,...,lnew+1
!
! where step=g(b)/lnew and (a,b) = (break(1),break(l+1)).
!
! In the event that pc=d**(k-1)(f) is constant in (a,b) and
! therefore h=0 identically, brknew is chosen uniformly spaced.
!
! optional printed output
! coefg.....the pp coefficients of g are printed out if iprint is set
! positive in data statement below.
!
! Reference:
!
! Carl DeBoor,
! A Practical Guide to Splines,
! Springer Verlag.
!
! Parameters:
!
! Input, break, coef, l, k.....contains the pp-representation of a certain
! function f of order k . specifically,
! d**(k-1)f(x)=coef(k,i) for break(i) <= x < break(i+1)
!
! Input, lnew.....number of intervals into which the interval (a,b) is to be
! sectioned by the new breakpoint sequence brknew .
!
! Output, real ( kind = 8 ) BRKNEW(LNEW+1), the new breakpoint sequence.
!
! Output, coefg.....the coefficient part of the pp-repr. break, coefg, l, 2
! for the monotone p.linear function g with respect to which brknew will
! be equidistributed.
!
implicit none
integer k
integer l
integer lnew
real ( kind = 8 ) break(l+1)
real ( kind = 8 ) brknew(lnew+1)
real ( kind = 8 ) coef(k,l)
real ( kind = 8 ) coefg(2,l)
real ( kind = 8 ) dif
real ( kind = 8 ) difprv
integer i
integer, save :: iprint = 0
integer j
real ( kind = 8 ) oneovk
real ( kind = 8 ) step
real ( kind = 8 ) stepi
brknew(1) = break(1)
brknew(lnew+1) = break(l+1)
!
! If G is constant, BRKNEW is uniform.
!
if ( l <= 1) then
step = (break(l+1)-break(1))/ real ( lnew, kind = 8 )
do i = 2, lnew
brknew(i) = break(1) + real ( i - 1, kind = 8 ) * step
end do
return
end if
!
! Construct the continuous piecewise linear function G.
!
oneovk = 1.0D+00 / real ( k, kind = 8 )
coefg(1,1) = 0.0D+00
difprv = abs(coef(k,2)-coef(k,1))/(break(3)-break(1))
do i = 2, l
dif = abs(coef(k,i)-coef(k,i-1))/(break(i+1)-break(i-1))
coefg(2,i-1) = (dif+difprv)**oneovk
coefg(1,i) = coefg(1,i-1)+coefg(2,i-1)*(break(i)-break(i-1))
difprv = dif
end do
coefg(2,l) = ( 2.0D+00 * difprv )**oneovk
!
! step = g(b)/lnew
!
step=(coefg(1,l)+coefg(2,l)*(break(l+1)-break(l))) / real ( lnew, kind = 8 )
if ( 0 < iprint ) then
write(*,20)step,(i,coefg(1,i),coefg(2,i),i=1,l)
end if
20 format (' step =',e16.7/(i5,2e16.5))
!
! if G is constant, BRKNEW is uniform.
!
if ( step <= 0.0D+00 ) then
step = (break(l+1)-break(1)) / real ( lnew, kind = 8 )
do i = 2, lnew
brknew(i) = break(1) + real ( i - 1, kind = 8 ) * step
end do
return
end if
!
! for i=2,...,lnew, construct brknew(i)=a+g**(-1)(stepi),
! with stepi=(i-1)*step . this requires inversion of the p.lin-
! ear function g . for this, j is found so that
! g(break(j)) <= stepi .le. g(break(j+1))
! and then
! brknew(i) = break(j)+(stepi-g(break(j)))/dg(break(j)) .
! the midpoint is chosen if dg(break(j))=0 .
!
j = 1
do i = 2, lnew
stepi = real ( i - 1, kind = 8 ) * step
do
if ( j == l ) then
exit
end if
if ( stepi <= coefg(1,j+1) ) then
exit
end if
j = j + 1
end do
if ( coefg(2,j) /= 0.0D+00 ) then
brknew(i) = break(j)+(stepi-coefg(1,j))/coefg(2,j)
else
brknew(i) = ( break(j) + break(j+1) ) / 2.0D+00
end if
end do
return
end
diff --git a/pppack/ppvalu.f90 b/pppack/ppvalu.f90
index b392355..71a60ee 100644
--- a/pppack/ppvalu.f90
+++ b/pppack/ppvalu.f90
@@ -1,134 +1,134 @@
!>
!> @file ppvalu.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
-!> spclibs is free software: you can redistribute it and/or modify it under
+!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
-!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see